github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/tcpip/network/ipv6/ipv6.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package ipv6 contains the implementation of the ipv6 network protocol.
    16  package ipv6
    17  
    18  import (
    19  	"encoding/binary"
    20  	"fmt"
    21  	"hash/fnv"
    22  	"math"
    23  	"reflect"
    24  	"sort"
    25  	"time"
    26  
    27  	"github.com/MerlinKodo/gvisor/pkg/atomicbitops"
    28  	"github.com/MerlinKodo/gvisor/pkg/buffer"
    29  	"github.com/MerlinKodo/gvisor/pkg/sync"
    30  	"github.com/MerlinKodo/gvisor/pkg/tcpip"
    31  	"github.com/MerlinKodo/gvisor/pkg/tcpip/header"
    32  	"github.com/MerlinKodo/gvisor/pkg/tcpip/header/parse"
    33  	"github.com/MerlinKodo/gvisor/pkg/tcpip/network/hash"
    34  	"github.com/MerlinKodo/gvisor/pkg/tcpip/network/internal/fragmentation"
    35  	"github.com/MerlinKodo/gvisor/pkg/tcpip/network/internal/ip"
    36  	"github.com/MerlinKodo/gvisor/pkg/tcpip/network/internal/multicast"
    37  	"github.com/MerlinKodo/gvisor/pkg/tcpip/stack"
    38  )
    39  
    40  const (
    41  	// ReassembleTimeout controls how long a fragment will be held.
    42  	// As per RFC 8200 section 4.5:
    43  	//
    44  	//   If insufficient fragments are received to complete reassembly of a packet
    45  	//   within 60 seconds of the reception of the first-arriving fragment of that
    46  	//   packet, reassembly of that packet must be abandoned.
    47  	//
    48  	// Linux also uses 60 seconds for reassembly timeout:
    49  	// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ipv6.h#L456
    50  	ReassembleTimeout = 60 * time.Second
    51  
    52  	// ProtocolNumber is the ipv6 protocol number.
    53  	ProtocolNumber = header.IPv6ProtocolNumber
    54  
    55  	// maxPayloadSize is the maximum size that can be encoded in the 16-bit
    56  	// PayloadLength field of the ipv6 header.
    57  	maxPayloadSize = 0xffff
    58  
    59  	// DefaultTTL is the default hop limit for IPv6 Packets egressed by
    60  	// Netstack.
    61  	DefaultTTL = 64
    62  
    63  	// buckets for fragment identifiers
    64  	buckets = 2048
    65  )
    66  
    67  const (
    68  	forwardingDisabled = 0
    69  	forwardingEnabled  = 1
    70  )
    71  
    72  // policyTable is the default policy table defined in RFC 6724 section 2.1.
    73  //
    74  // A more human-readable version:
    75  //
    76  //	Prefix        Precedence Label
    77  //	::1/128               50     0
    78  //	::/0                  40     1
    79  //	::ffff:0:0/96         35     4
    80  //	2002::/16             30     2
    81  //	2001::/32              5     5
    82  //	fc00::/7               3    13
    83  //	::/96                  1     3
    84  //	fec0::/10              1    11
    85  //	3ffe::/16              1    12
    86  //
    87  // The table is sorted by prefix length so longest-prefix match can be easily
    88  // achieved.
    89  //
    90  // We willingly left out ::/96, fec0::/10 and 3ffe::/16 since those prefix
    91  // assignments are deprecated.
    92  //
    93  // As per RFC 4291 section 2.5.5.1 (for ::/96),
    94  //
    95  //	The "IPv4-Compatible IPv6 address" is now deprecated because the
    96  //	current IPv6 transition mechanisms no longer use these addresses.
    97  //	New or updated implementations are not required to support this
    98  //	address type.
    99  //
   100  // As per RFC 3879 section 4 (for fec0::/10),
   101  //
   102  //	This document formally deprecates the IPv6 site-local unicast prefix
   103  //	defined in [RFC3513], i.e., 1111111011 binary or FEC0::/10.
   104  //
   105  // As per RFC 3701 section 1 (for 3ffe::/16),
   106  //
   107  //	As clearly stated in [TEST-NEW], the addresses for the 6bone are
   108  //	temporary and will be reclaimed in the future. It further states
   109  //	that all users of these addresses (within the 3FFE::/16 prefix) will
   110  //	be required to renumber at some time in the future.
   111  //
   112  // and section 2,
   113  //
   114  //	Thus after the pTLA allocation cutoff date January 1, 2004, it is
   115  //	REQUIRED that no new 6bone 3FFE pTLAs be allocated.
   116  //
   117  // MUST NOT BE MODIFIED.
   118  var policyTable = [...]struct {
   119  	subnet tcpip.Subnet
   120  
   121  	label uint8
   122  }{
   123  	// ::1/128
   124  	{
   125  		subnet: header.IPv6Loopback.WithPrefix().Subnet(),
   126  		label:  0,
   127  	},
   128  	// ::ffff:0:0/96
   129  	{
   130  		subnet: header.IPv4MappedIPv6Subnet,
   131  		label:  4,
   132  	},
   133  	// 2001::/32 (Teredo prefix as per RFC 4380 section 2.6).
   134  	{
   135  		subnet: tcpip.AddressWithPrefix{
   136  			Address:   tcpip.AddrFrom16([16]byte{0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
   137  			PrefixLen: 32,
   138  		}.Subnet(),
   139  		label: 5,
   140  	},
   141  	// 2002::/16 (6to4 prefix as per RFC 3056 section 2).
   142  	{
   143  		subnet: tcpip.AddressWithPrefix{
   144  			Address:   tcpip.AddrFrom16([16]byte{0x20, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
   145  			PrefixLen: 16,
   146  		}.Subnet(),
   147  		label: 2,
   148  	},
   149  	// fc00::/7 (Unique local addresses as per RFC 4193 section 3.1).
   150  	{
   151  		subnet: tcpip.AddressWithPrefix{
   152  			Address:   tcpip.AddrFrom16([16]byte{0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
   153  			PrefixLen: 7,
   154  		}.Subnet(),
   155  		label: 13,
   156  	},
   157  	// ::/0
   158  	{
   159  		subnet: header.IPv6EmptySubnet,
   160  		label:  1,
   161  	},
   162  }
   163  
   164  func getLabel(addr tcpip.Address) uint8 {
   165  	for _, p := range policyTable {
   166  		if p.subnet.Contains(addr) {
   167  			return p.label
   168  		}
   169  	}
   170  
   171  	panic(fmt.Sprintf("should have a label for address = %s", addr))
   172  }
   173  
   174  var _ stack.DuplicateAddressDetector = (*endpoint)(nil)
   175  var _ stack.LinkAddressResolver = (*endpoint)(nil)
   176  var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil)
   177  var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil)
   178  var _ stack.MulticastForwardingNetworkEndpoint = (*endpoint)(nil)
   179  var _ stack.GroupAddressableEndpoint = (*endpoint)(nil)
   180  var _ stack.AddressableEndpoint = (*endpoint)(nil)
   181  var _ stack.NetworkEndpoint = (*endpoint)(nil)
   182  var _ stack.NDPEndpoint = (*endpoint)(nil)
   183  var _ MLDEndpoint = (*endpoint)(nil)
   184  var _ NDPEndpoint = (*endpoint)(nil)
   185  
   186  type endpoint struct {
   187  	nic        stack.NetworkInterface
   188  	dispatcher stack.TransportDispatcher
   189  	protocol   *protocol
   190  	stats      sharedStats
   191  
   192  	// enabled is set to 1 when the endpoint is enabled and 0 when it is
   193  	// disabled.
   194  	enabled atomicbitops.Uint32
   195  
   196  	// forwarding is set to forwardingEnabled when the endpoint has forwarding
   197  	// enabled and forwardingDisabled when it is disabled.
   198  	forwarding atomicbitops.Uint32
   199  
   200  	// multicastForwarding is set to forwardingEnabled when the endpoint has
   201  	// forwarding enabled and forwardingDisabled when it is disabled.
   202  	//
   203  	// TODO(https://gvisor.dev/issue/7338): Implement support for multicast
   204  	// forwarding. Currently, setting this value to true is a no-op.
   205  	multicastForwarding atomicbitops.Uint32
   206  
   207  	mu struct {
   208  		sync.RWMutex
   209  
   210  		addressableEndpointState stack.AddressableEndpointState
   211  		ndp                      ndpState
   212  		mld                      mldState
   213  	}
   214  
   215  	// dad is used to check if an arbitrary address is already assigned to some
   216  	// neighbor.
   217  	//
   218  	// Note: this is different from mu.ndp.dad which is used to perform DAD for
   219  	// addresses that are assigned to the interface. Removing an address aborts
   220  	// DAD; if we had used the same state, handlers for a removed address would
   221  	// not be called with the actual DAD result.
   222  	//
   223  	// LOCK ORDERING: mu > dad.mu.
   224  	dad struct {
   225  		mu struct {
   226  			sync.Mutex
   227  
   228  			dad ip.DAD
   229  		}
   230  	}
   231  }
   232  
   233  // NICNameFromID is a function that returns a stable name for the specified NIC,
   234  // even if different NIC IDs are used to refer to the same NIC in different
   235  // program runs. It is used when generating opaque interface identifiers (IIDs).
   236  // If the NIC was created with a name, it is passed to NICNameFromID.
   237  //
   238  // NICNameFromID SHOULD return unique NIC names so unique opaque IIDs are
   239  // generated for the same prefix on different NICs.
   240  type NICNameFromID func(tcpip.NICID, string) string
   241  
   242  // OpaqueInterfaceIdentifierOptions holds the options related to the generation
   243  // of opaque interface identifiers (IIDs) as defined by RFC 7217.
   244  type OpaqueInterfaceIdentifierOptions struct {
   245  	// NICNameFromID is a function that returns a stable name for a specified NIC,
   246  	// even if the NIC ID changes over time.
   247  	//
   248  	// Must be specified to generate the opaque IID.
   249  	NICNameFromID NICNameFromID
   250  
   251  	// SecretKey is a pseudo-random number used as the secret key when generating
   252  	// opaque IIDs as defined by RFC 7217. The key SHOULD be at least
   253  	// header.OpaqueIIDSecretKeyMinBytes bytes and MUST follow minimum randomness
   254  	// requirements for security as outlined by RFC 4086. SecretKey MUST NOT
   255  	// change between program runs, unless explicitly changed.
   256  	//
   257  	// OpaqueInterfaceIdentifierOptions takes ownership of SecretKey. SecretKey
   258  	// MUST NOT be modified after Stack is created.
   259  	//
   260  	// May be nil, but a nil value is highly discouraged to maintain
   261  	// some level of randomness between nodes.
   262  	SecretKey []byte
   263  }
   264  
   265  // CheckDuplicateAddress implements stack.DuplicateAddressDetector.
   266  func (e *endpoint) CheckDuplicateAddress(addr tcpip.Address, h stack.DADCompletionHandler) stack.DADCheckAddressDisposition {
   267  	e.dad.mu.Lock()
   268  	defer e.dad.mu.Unlock()
   269  	return e.dad.mu.dad.CheckDuplicateAddressLocked(addr, h)
   270  }
   271  
   272  // SetDADConfigurations implements stack.DuplicateAddressDetector.
   273  func (e *endpoint) SetDADConfigurations(c stack.DADConfigurations) {
   274  	e.mu.Lock()
   275  	defer e.mu.Unlock()
   276  	e.dad.mu.Lock()
   277  	defer e.dad.mu.Unlock()
   278  
   279  	e.mu.ndp.dad.SetConfigsLocked(c)
   280  	e.dad.mu.dad.SetConfigsLocked(c)
   281  }
   282  
   283  // DuplicateAddressProtocol implements stack.DuplicateAddressDetector.
   284  func (*endpoint) DuplicateAddressProtocol() tcpip.NetworkProtocolNumber {
   285  	return ProtocolNumber
   286  }
   287  
   288  // HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint.
   289  func (e *endpoint) HandleLinkResolutionFailure(pkt stack.PacketBufferPtr) {
   290  	// If we are operating as a router, we should return an ICMP error to the
   291  	// original packet's sender.
   292  	if pkt.NetworkPacketInfo.IsForwardedPacket {
   293  		// TODO(gvisor.dev/issue/6005): Propagate asynchronously generated ICMP
   294  		// errors to local endpoints.
   295  		e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt, false /* deliveredLocally */)
   296  		e.stats.ip.Forwarding.Errors.Increment()
   297  		e.stats.ip.Forwarding.HostUnreachable.Increment()
   298  		return
   299  	}
   300  	// handleControl expects the entire offending packet to be in the packet
   301  	// buffer's data field.
   302  	pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{
   303  		Payload: pkt.ToBuffer(),
   304  	})
   305  	defer pkt.DecRef()
   306  	pkt.NICID = e.nic.ID()
   307  	pkt.NetworkProtocolNumber = ProtocolNumber
   308  	e.handleControl(&icmpv6DestinationAddressUnreachableSockError{}, pkt)
   309  }
   310  
   311  // onAddressAssignedLocked handles an address being assigned.
   312  //
   313  // Precondition: e.mu must be exclusively locked.
   314  func (e *endpoint) onAddressAssignedLocked(addr tcpip.Address) {
   315  	// As per RFC 2710 section 3,
   316  	//
   317  	//   All MLD  messages described in this document are sent with a link-local
   318  	//   IPv6 Source Address, ...
   319  	//
   320  	// If we just completed DAD for a link-local address, then attempt to send any
   321  	// queued MLD reports. Note, we may have sent reports already for some of the
   322  	// groups before we had a valid link-local address to use as the source for
   323  	// the MLD messages, but that was only so that MLD snooping switches are aware
   324  	// of our membership to groups - routers would not have handled those reports.
   325  	//
   326  	// As per RFC 3590 section 4,
   327  	//
   328  	//   MLD Report and Done messages are sent with a link-local address as
   329  	//   the IPv6 source address, if a valid address is available on the
   330  	//   interface. If a valid link-local address is not available (e.g., one
   331  	//   has not been configured), the message is sent with the unspecified
   332  	//   address (::) as the IPv6 source address.
   333  	//
   334  	//   Once a valid link-local address is available, a node SHOULD generate
   335  	//   new MLD Report messages for all multicast addresses joined on the
   336  	//   interface.
   337  	//
   338  	//   Routers receiving an MLD Report or Done message with the unspecified
   339  	//   address as the IPv6 source address MUST silently discard the packet
   340  	//   without taking any action on the packets contents.
   341  	//
   342  	//   Snooping switches MUST manage multicast forwarding state based on MLD
   343  	//   Report and Done messages sent with the unspecified address as the
   344  	//   IPv6 source address.
   345  	if header.IsV6LinkLocalUnicastAddress(addr) {
   346  		e.mu.mld.sendQueuedReports()
   347  	}
   348  }
   349  
   350  // InvalidateDefaultRouter implements stack.NDPEndpoint.
   351  func (e *endpoint) InvalidateDefaultRouter(rtr tcpip.Address) {
   352  	e.mu.Lock()
   353  	defer e.mu.Unlock()
   354  
   355  	// We represent default routers with a default (off-link) route through the
   356  	// router.
   357  	e.mu.ndp.invalidateOffLinkRoute(offLinkRoute{dest: header.IPv6EmptySubnet, router: rtr})
   358  }
   359  
   360  // SetMLDVersion implements MLDEndpoint.
   361  func (e *endpoint) SetMLDVersion(v MLDVersion) MLDVersion {
   362  	e.mu.Lock()
   363  	defer e.mu.Unlock()
   364  	return e.mu.mld.setVersion(v)
   365  }
   366  
   367  // GetMLDVersion implements MLDEndpoint.
   368  func (e *endpoint) GetMLDVersion() MLDVersion {
   369  	e.mu.RLock()
   370  	defer e.mu.RUnlock()
   371  	return e.mu.mld.getVersion()
   372  }
   373  
   374  // SetNDPConfigurations implements NDPEndpoint.
   375  func (e *endpoint) SetNDPConfigurations(c NDPConfigurations) {
   376  	c.validate()
   377  	e.mu.Lock()
   378  	defer e.mu.Unlock()
   379  	e.mu.ndp.configs = c
   380  }
   381  
   382  // hasTentativeAddr returns true if addr is tentative on e.
   383  func (e *endpoint) hasTentativeAddr(addr tcpip.Address) bool {
   384  	e.mu.RLock()
   385  	addressEndpoint := e.getAddressRLocked(addr)
   386  	e.mu.RUnlock()
   387  	return addressEndpoint != nil && addressEndpoint.GetKind() == stack.PermanentTentative
   388  }
   389  
   390  // dupTentativeAddrDetected attempts to inform e that a tentative addr is a
   391  // duplicate on a link.
   392  //
   393  // dupTentativeAddrDetected removes the tentative address if it exists. If the
   394  // address was generated via SLAAC, an attempt is made to generate a new
   395  // address.
   396  func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, holderLinkAddr tcpip.LinkAddress, nonce []byte) tcpip.Error {
   397  	e.mu.Lock()
   398  	defer e.mu.Unlock()
   399  
   400  	addressEndpoint := e.getAddressRLocked(addr)
   401  	if addressEndpoint == nil {
   402  		return &tcpip.ErrBadAddress{}
   403  	}
   404  
   405  	if addressEndpoint.GetKind() != stack.PermanentTentative {
   406  		return &tcpip.ErrInvalidEndpointState{}
   407  	}
   408  
   409  	switch result := e.mu.ndp.dad.ExtendIfNonceEqualLocked(addr, nonce); result {
   410  	case ip.Extended:
   411  		// The nonce we got back was the same we sent so we know the message
   412  		// indicating a duplicate address was likely ours so do not consider
   413  		// the address duplicate here.
   414  		return nil
   415  	case ip.AlreadyExtended:
   416  		// See Extended.
   417  		//
   418  		// Our DAD message was looped back already.
   419  		return nil
   420  	case ip.NoDADStateFound:
   421  		panic(fmt.Sprintf("expected DAD state for tentative address %s", addr))
   422  	case ip.NonceDisabled:
   423  		// If nonce is disabled then we have no way to know if the packet was
   424  		// looped-back so we have to assume it indicates a duplicate address.
   425  		fallthrough
   426  	case ip.NonceNotEqual:
   427  		// If the address is a SLAAC address, do not invalidate its SLAAC prefix as an
   428  		// attempt will be made to generate a new address for it.
   429  		if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */, stack.AddressRemovalDADFailed, &stack.DADDupAddrDetected{HolderLinkAddress: holderLinkAddr}); err != nil {
   430  			return err
   431  		}
   432  
   433  		prefix := addressEndpoint.Subnet()
   434  
   435  		switch t := addressEndpoint.ConfigType(); t {
   436  		case stack.AddressConfigStatic:
   437  		case stack.AddressConfigSlaac:
   438  			if addressEndpoint.Temporary() {
   439  				// Do not reset the generation attempts counter for the prefix as the
   440  				// temporary address is being regenerated in response to a DAD conflict.
   441  				e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
   442  			} else {
   443  				e.mu.ndp.regenerateSLAACAddr(prefix)
   444  			}
   445  		default:
   446  			panic(fmt.Sprintf("unrecognized address config type = %d", t))
   447  		}
   448  
   449  		return nil
   450  	default:
   451  		panic(fmt.Sprintf("unhandled result = %d", result))
   452  	}
   453  }
   454  
   455  // Forwarding implements stack.ForwardingNetworkEndpoint.
   456  func (e *endpoint) Forwarding() bool {
   457  	return e.forwarding.Load() == forwardingEnabled
   458  }
   459  
   460  // setForwarding sets the forwarding status for the endpoint.
   461  //
   462  // Returns the previous forwarding status.
   463  func (e *endpoint) setForwarding(v bool) bool {
   464  	forwarding := uint32(forwardingDisabled)
   465  	if v {
   466  		forwarding = forwardingEnabled
   467  	}
   468  
   469  	return e.forwarding.Swap(forwarding) != forwardingDisabled
   470  }
   471  
   472  // SetForwarding implements stack.ForwardingNetworkEndpoint.
   473  func (e *endpoint) SetForwarding(forwarding bool) bool {
   474  	e.mu.Lock()
   475  	defer e.mu.Unlock()
   476  
   477  	prevForwarding := e.setForwarding(forwarding)
   478  	if prevForwarding == forwarding {
   479  		return prevForwarding
   480  	}
   481  
   482  	allRoutersGroups := [...]tcpip.Address{
   483  		header.IPv6AllRoutersInterfaceLocalMulticastAddress,
   484  		header.IPv6AllRoutersLinkLocalMulticastAddress,
   485  		header.IPv6AllRoutersSiteLocalMulticastAddress,
   486  	}
   487  
   488  	if forwarding {
   489  		// As per RFC 4291 section 2.8:
   490  		//
   491  		//   A router is required to recognize all addresses that a host is
   492  		//   required to recognize, plus the following addresses as identifying
   493  		//   itself:
   494  		//
   495  		//      o The All-Routers multicast addresses defined in Section 2.7.1.
   496  		//
   497  		// As per RFC 4291 section 2.7.1,
   498  		//
   499  		//      All Routers Addresses:   FF01:0:0:0:0:0:0:2
   500  		//                               FF02:0:0:0:0:0:0:2
   501  		//                               FF05:0:0:0:0:0:0:2
   502  		//
   503  		//   The above multicast addresses identify the group of all IPv6 routers,
   504  		//   within scope 1 (interface-local), 2 (link-local), or 5 (site-local).
   505  		for _, g := range allRoutersGroups {
   506  			if err := e.joinGroupLocked(g); err != nil {
   507  				// joinGroupLocked only returns an error if the group address is not a
   508  				// valid IPv6 multicast address.
   509  				panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", g, err))
   510  			}
   511  		}
   512  	} else {
   513  		for _, g := range allRoutersGroups {
   514  			switch err := e.leaveGroupLocked(g).(type) {
   515  			case nil:
   516  			case *tcpip.ErrBadLocalAddress:
   517  				// The endpoint may have already left the multicast group.
   518  			default:
   519  				panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", g, err))
   520  			}
   521  		}
   522  	}
   523  
   524  	e.mu.ndp.forwardingChanged(forwarding)
   525  	return prevForwarding
   526  }
   527  
   528  // MulticastForwarding implements stack.MulticastForwardingNetworkEndpoint.
   529  func (e *endpoint) MulticastForwarding() bool {
   530  	return e.multicastForwarding.Load() == forwardingEnabled
   531  }
   532  
   533  // SetMulticastForwarding implements stack.MulticastForwardingNetworkEndpoint.
   534  func (e *endpoint) SetMulticastForwarding(forwarding bool) bool {
   535  	updatedForwarding := uint32(forwardingDisabled)
   536  	if forwarding {
   537  		updatedForwarding = forwardingEnabled
   538  	}
   539  
   540  	return e.multicastForwarding.Swap(updatedForwarding) != forwardingDisabled
   541  }
   542  
   543  // Enable implements stack.NetworkEndpoint.
   544  func (e *endpoint) Enable() tcpip.Error {
   545  	e.mu.Lock()
   546  	defer e.mu.Unlock()
   547  
   548  	// If the NIC is not enabled, the endpoint can't do anything meaningful so
   549  	// don't enable the endpoint.
   550  	if !e.nic.Enabled() {
   551  		return &tcpip.ErrNotPermitted{}
   552  	}
   553  
   554  	// If the endpoint is already enabled, there is nothing for it to do.
   555  	if !e.setEnabled(true) {
   556  		return nil
   557  	}
   558  
   559  	// Perform DAD on the all the unicast IPv6 endpoints that are in the permanent
   560  	// state.
   561  	//
   562  	// Addresses may have already completed DAD but in the time since the endpoint
   563  	// was last enabled, other devices may have acquired the same addresses.
   564  	var err tcpip.Error
   565  	e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
   566  		addr := addressEndpoint.AddressWithPrefix().Address
   567  		if !header.IsV6UnicastAddress(addr) {
   568  			return true
   569  		}
   570  
   571  		switch kind := addressEndpoint.GetKind(); kind {
   572  		case stack.Permanent:
   573  			addressEndpoint.SetKind(stack.PermanentTentative)
   574  			fallthrough
   575  		case stack.PermanentTentative:
   576  			err = e.mu.ndp.startDuplicateAddressDetection(addr, addressEndpoint)
   577  			return err == nil
   578  		case stack.Temporary, stack.PermanentExpired:
   579  			return true
   580  		default:
   581  			panic(fmt.Sprintf("address %s has unknown kind %d", addressEndpoint.AddressWithPrefix(), kind))
   582  		}
   583  	})
   584  	// It is important to enable after starting DAD on all the addresses so that
   585  	// if DAD is disabled, the Tentative state is not observed.
   586  	//
   587  	// Must be called after Enabled has been set.
   588  	e.mu.addressableEndpointState.OnNetworkEndpointEnabledChanged()
   589  	if err != nil {
   590  		return err
   591  	}
   592  
   593  	// Groups may have been joined when the endpoint was disabled, or the
   594  	// endpoint may have left groups from the perspective of MLD when the
   595  	// endpoint was disabled. Either way, we need to let routers know to
   596  	// send us multicast traffic.
   597  	e.mu.mld.initializeAll()
   598  
   599  	// Join the IPv6 All-Nodes Multicast group if the stack is configured to
   600  	// use IPv6. This is required to ensure that this node properly receives
   601  	// and responds to the various NDP messages that are destined to the
   602  	// all-nodes multicast address. An example is the Neighbor Advertisement
   603  	// when we perform Duplicate Address Detection, or Router Advertisement
   604  	// when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861
   605  	// section 4.2 for more information.
   606  	//
   607  	// Also auto-generate an IPv6 link-local address based on the endpoint's
   608  	// link address if it is configured to do so. Note, each interface is
   609  	// required to have IPv6 link-local unicast address, as per RFC 4291
   610  	// section 2.1.
   611  
   612  	// Join the All-Nodes multicast group before starting DAD as responses to DAD
   613  	// (NDP NS) messages may be sent to the All-Nodes multicast group if the
   614  	// source address of the NDP NS is the unspecified address, as per RFC 4861
   615  	// section 7.2.4.
   616  	if err := e.joinGroupLocked(header.IPv6AllNodesMulticastAddress); err != nil {
   617  		// joinGroupLocked only returns an error if the group address is not a valid
   618  		// IPv6 multicast address.
   619  		panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv6AllNodesMulticastAddress, err))
   620  	}
   621  
   622  	// Do not auto-generate an IPv6 link-local address for loopback devices.
   623  	if e.protocol.options.AutoGenLinkLocal && !e.nic.IsLoopback() {
   624  		// The valid and preferred lifetime is infinite for the auto-generated
   625  		// link-local address.
   626  		e.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime)
   627  	}
   628  
   629  	e.mu.ndp.startSolicitingRouters()
   630  	return nil
   631  }
   632  
   633  // Enabled implements stack.NetworkEndpoint.
   634  func (e *endpoint) Enabled() bool {
   635  	return e.nic.Enabled() && e.isEnabled()
   636  }
   637  
   638  // isEnabled returns true if the endpoint is enabled, regardless of the
   639  // enabled status of the NIC.
   640  func (e *endpoint) isEnabled() bool {
   641  	return e.enabled.Load() == 1
   642  }
   643  
   644  // setEnabled sets the enabled status for the endpoint.
   645  //
   646  // Returns true if the enabled status was updated.
   647  func (e *endpoint) setEnabled(v bool) bool {
   648  	if v {
   649  		return e.enabled.Swap(1) == 0
   650  	}
   651  	return e.enabled.Swap(0) == 1
   652  }
   653  
   654  // Disable implements stack.NetworkEndpoint.
   655  func (e *endpoint) Disable() {
   656  	e.mu.Lock()
   657  	defer e.mu.Unlock()
   658  	e.disableLocked()
   659  }
   660  
   661  func (e *endpoint) disableLocked() {
   662  	if !e.Enabled() {
   663  		return
   664  	}
   665  
   666  	e.mu.ndp.stopSolicitingRouters()
   667  	e.mu.ndp.cleanupState()
   668  
   669  	// The endpoint may have already left the multicast group.
   670  	switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress).(type) {
   671  	case nil, *tcpip.ErrBadLocalAddress:
   672  	default:
   673  		panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err))
   674  	}
   675  
   676  	// Leave groups from the perspective of MLD so that routers know that
   677  	// we are no longer interested in the group.
   678  	e.mu.mld.softLeaveAll()
   679  
   680  	// Stop DAD for all the tentative unicast addresses.
   681  	e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
   682  		addrWithPrefix := addressEndpoint.AddressWithPrefix()
   683  		switch kind := addressEndpoint.GetKind(); kind {
   684  		case stack.Permanent, stack.PermanentTentative:
   685  			if header.IsV6UnicastAddress(addrWithPrefix.Address) {
   686  				e.mu.ndp.stopDuplicateAddressDetection(addrWithPrefix.Address, &stack.DADAborted{})
   687  			}
   688  		case stack.Temporary, stack.PermanentExpired:
   689  		default:
   690  			panic(fmt.Sprintf("address %s has unknown address kind %d", addrWithPrefix, kind))
   691  		}
   692  		return true
   693  	})
   694  
   695  	if !e.setEnabled(false) {
   696  		panic("should have only done work to disable the endpoint if it was enabled")
   697  	}
   698  
   699  	// Must be called after Enabled has been set.
   700  	e.mu.addressableEndpointState.OnNetworkEndpointEnabledChanged()
   701  }
   702  
   703  // DefaultTTL is the default hop limit for this endpoint.
   704  func (e *endpoint) DefaultTTL() uint8 {
   705  	return e.protocol.DefaultTTL()
   706  }
   707  
   708  // MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the
   709  // network layer max header length.
   710  func (e *endpoint) MTU() uint32 {
   711  	networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv6MinimumSize)
   712  	if err != nil {
   713  		return 0
   714  	}
   715  	return networkMTU
   716  }
   717  
   718  // MaxHeaderLength returns the maximum length needed by ipv6 headers (and
   719  // underlying protocols).
   720  func (e *endpoint) MaxHeaderLength() uint16 {
   721  	// TODO(gvisor.dev/issues/5035): The maximum header length returned here does
   722  	// not open the possibility for the caller to know about size required for
   723  	// extension headers.
   724  	return e.nic.MaxHeaderLength() + header.IPv6MinimumSize
   725  }
   726  
   727  func addIPHeader(srcAddr, dstAddr tcpip.Address, pkt stack.PacketBufferPtr, params stack.NetworkHeaderParams, extensionHeaders header.IPv6ExtHdrSerializer) tcpip.Error {
   728  	extHdrsLen := extensionHeaders.Length()
   729  	length := pkt.Size() + extensionHeaders.Length()
   730  	if length > math.MaxUint16 {
   731  		return &tcpip.ErrMessageTooLong{}
   732  	}
   733  	header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + extHdrsLen)).Encode(&header.IPv6Fields{
   734  		PayloadLength:     uint16(length),
   735  		TransportProtocol: params.Protocol,
   736  		HopLimit:          params.TTL,
   737  		TrafficClass:      params.TOS,
   738  		SrcAddr:           srcAddr,
   739  		DstAddr:           dstAddr,
   740  		ExtensionHeaders:  extensionHeaders,
   741  	})
   742  	pkt.NetworkProtocolNumber = ProtocolNumber
   743  	return nil
   744  }
   745  
   746  func packetMustBeFragmented(pkt stack.PacketBufferPtr, networkMTU uint32) bool {
   747  	payload := len(pkt.TransportHeader().Slice()) + pkt.Data().Size()
   748  	return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU
   749  }
   750  
   751  // handleFragments fragments pkt and calls the handler function on each
   752  // fragment. It returns the number of fragments handled and the number of
   753  // fragments left to be processed. The IP header must already be present in the
   754  // original packet. The transport header protocol number is required to avoid
   755  // parsing the IPv6 extension headers.
   756  func (e *endpoint) handleFragments(r *stack.Route, networkMTU uint32, pkt stack.PacketBufferPtr, transProto tcpip.TransportProtocolNumber, handler func(stack.PacketBufferPtr) tcpip.Error) (int, int, tcpip.Error) {
   757  	networkHeader := header.IPv6(pkt.NetworkHeader().Slice())
   758  
   759  	// TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are
   760  	// supported for outbound packets, their length should not affect the fragment
   761  	// maximum payload length because they should only be transmitted once.
   762  	fragmentPayloadLen := (networkMTU - header.IPv6FragmentHeaderSize) &^ 7
   763  	if fragmentPayloadLen < header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit {
   764  		// We need at least 8 bytes of space left for the fragmentable part because
   765  		// the fragment payload must obviously be non-zero and must be a multiple
   766  		// of 8 as per RFC 8200 section 4.5:
   767  		//   Each complete fragment, except possibly the last ("rightmost") one, is
   768  		//   an integer multiple of 8 octets long.
   769  		return 0, 1, &tcpip.ErrMessageTooLong{}
   770  	}
   771  
   772  	if fragmentPayloadLen < uint32(len(pkt.TransportHeader().Slice())) {
   773  		// As per RFC 8200 Section 4.5, the Transport Header is expected to be small
   774  		// enough to fit in the first fragment.
   775  		return 0, 1, &tcpip.ErrMessageTooLong{}
   776  	}
   777  
   778  	pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadLen, calculateFragmentReserve(pkt))
   779  	defer pf.Release()
   780  	id := e.protocol.ids[hashRoute(r, e.protocol.hashIV)%buckets].Add(1)
   781  
   782  	var n int
   783  	for {
   784  		fragPkt, more := buildNextFragment(&pf, networkHeader, transProto, id)
   785  		err := handler(fragPkt)
   786  		fragPkt.DecRef()
   787  		if err != nil {
   788  			return n, pf.RemainingFragmentCount() + 1, err
   789  		}
   790  		n++
   791  		if !more {
   792  			return n, pf.RemainingFragmentCount(), nil
   793  		}
   794  	}
   795  }
   796  
   797  // WritePacket writes a packet to the given destination address and protocol.
   798  func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt stack.PacketBufferPtr) tcpip.Error {
   799  	dstAddr := r.RemoteAddress()
   800  	if err := addIPHeader(r.LocalAddress(), dstAddr, pkt, params, nil /* extensionHeaders */); err != nil {
   801  		return err
   802  	}
   803  
   804  	// iptables filtering. All packets that reach here are locally
   805  	// generated.
   806  	outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
   807  	if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok {
   808  		// iptables is telling us to drop the packet.
   809  		e.stats.ip.IPTablesOutputDropped.Increment()
   810  		return nil
   811  	}
   812  
   813  	// If the packet is manipulated as per DNAT Output rules, handle packet
   814  	// based on destination address and do not send the packet to link
   815  	// layer.
   816  	//
   817  	// We should do this for every packet, rather than only DNATted packets, but
   818  	// removing this check short circuits broadcasts before they are sent out to
   819  	// other hosts.
   820  	if netHeader := header.IPv6(pkt.NetworkHeader().Slice()); dstAddr != netHeader.DestinationAddress() {
   821  		if ep := e.protocol.findEndpointWithAddress(netHeader.DestinationAddress()); ep != nil {
   822  			// Since we rewrote the packet but it is being routed back to us, we
   823  			// can safely assume the checksum is valid.
   824  			ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */)
   825  			return nil
   826  		}
   827  	}
   828  
   829  	return e.writePacket(r, pkt, params.Protocol, false /* headerIncluded */)
   830  }
   831  
   832  func (e *endpoint) writePacket(r *stack.Route, pkt stack.PacketBufferPtr, protocol tcpip.TransportProtocolNumber, headerIncluded bool) tcpip.Error {
   833  	if r.Loop()&stack.PacketLoop != 0 {
   834  		// If the packet was generated by the stack (not a raw/packet endpoint
   835  		// where a packet may be written with the header included), then we can
   836  		// safely assume the checksum is valid.
   837  		e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */)
   838  	}
   839  	if r.Loop()&stack.PacketOut == 0 {
   840  		return nil
   841  	}
   842  
   843  	// Postrouting NAT can only change the source address, and does not alter the
   844  	// route or outgoing interface of the packet.
   845  	outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
   846  	if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok {
   847  		// iptables is telling us to drop the packet.
   848  		e.stats.ip.IPTablesPostroutingDropped.Increment()
   849  		return nil
   850  	}
   851  
   852  	stats := e.stats.ip
   853  	networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(len(pkt.NetworkHeader().Slice())))
   854  	if err != nil {
   855  		stats.OutgoingPacketErrors.Increment()
   856  		return err
   857  	}
   858  
   859  	if packetMustBeFragmented(pkt, networkMTU) {
   860  		if pkt.NetworkPacketInfo.IsForwardedPacket {
   861  			// As per RFC 2460, section 4.5:
   862  			//   Unlike IPv4, fragmentation in IPv6 is performed only by source nodes,
   863  			//   not by routers along a packet's delivery path.
   864  			return &tcpip.ErrMessageTooLong{}
   865  		}
   866  		sent, remain, err := e.handleFragments(r, networkMTU, pkt, protocol, func(fragPkt stack.PacketBufferPtr) tcpip.Error {
   867  			// TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each
   868  			// fragment one by one using WritePacket() (current strategy) or if we
   869  			// want to create a PacketBufferList from the fragments and feed it to
   870  			// WritePackets(). It'll be faster but cost more memory.
   871  			return e.nic.WritePacket(r, fragPkt)
   872  		})
   873  		stats.PacketsSent.IncrementBy(uint64(sent))
   874  		stats.OutgoingPacketErrors.IncrementBy(uint64(remain))
   875  		return err
   876  	}
   877  
   878  	if err := e.nic.WritePacket(r, pkt); err != nil {
   879  		stats.OutgoingPacketErrors.Increment()
   880  		return err
   881  	}
   882  
   883  	stats.PacketsSent.Increment()
   884  	return nil
   885  }
   886  
   887  // WriteHeaderIncludedPacket implements stack.NetworkEndpoint.
   888  func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBufferPtr) tcpip.Error {
   889  	// The packet already has an IP header, but there are a few required checks.
   890  	h, ok := pkt.Data().PullUp(header.IPv6MinimumSize)
   891  	if !ok {
   892  		return &tcpip.ErrMalformedHeader{}
   893  	}
   894  	ipH := header.IPv6(h)
   895  
   896  	// Always set the payload length.
   897  	pktSize := pkt.Data().Size()
   898  	ipH.SetPayloadLength(uint16(pktSize - header.IPv6MinimumSize))
   899  
   900  	// Set the source address when zero.
   901  	if ipH.SourceAddress() == header.IPv6Any {
   902  		ipH.SetSourceAddress(r.LocalAddress())
   903  	}
   904  
   905  	// Populate the packet buffer's network header and don't allow an invalid
   906  	// packet to be sent.
   907  	//
   908  	// Note that parsing only makes sure that the packet is well formed as per the
   909  	// wire format. We also want to check if the header's fields are valid before
   910  	// sending the packet.
   911  	proto, _, _, _, ok := parse.IPv6(pkt)
   912  	if !ok || !header.IPv6(pkt.NetworkHeader().Slice()).IsValid(pktSize) {
   913  		return &tcpip.ErrMalformedHeader{}
   914  	}
   915  
   916  	return e.writePacket(r, pkt, proto, true /* headerIncluded */)
   917  }
   918  
   919  func validateAddressesForForwarding(h header.IPv6) ip.ForwardingError {
   920  	srcAddr := h.SourceAddress()
   921  
   922  	// As per RFC 4291 section 2.5.2,
   923  	//
   924  	//   The address 0:0:0:0:0:0:0:0 is called the unspecified address. It
   925  	//   must never be assigned to any node. It indicates the absence of an
   926  	//   address. One example of its use is in the Source Address field of
   927  	//   any IPv6 packets sent by an initializing host before it has learned
   928  	//   its own address.
   929  	//
   930  	//   The unspecified address must not be used as the destination address
   931  	//   of IPv6 packets or in IPv6 Routing headers. An IPv6 packet with a
   932  	//   source address of unspecified must never be forwarded by an IPv6
   933  	//   router.
   934  	if srcAddr.Unspecified() {
   935  		return &ip.ErrInitializingSourceAddress{}
   936  	}
   937  
   938  	// As per RFC 4291 section 2.5.6,
   939  	//
   940  	//   Routers must not forward any packets with Link-Local source or
   941  	//   destination addresses to other links.
   942  	if header.IsV6LinkLocalUnicastAddress(srcAddr) {
   943  		return &ip.ErrLinkLocalSourceAddress{}
   944  	}
   945  
   946  	if dstAddr := h.DestinationAddress(); header.IsV6LinkLocalUnicastAddress(dstAddr) || header.IsV6LinkLocalMulticastAddress(dstAddr) {
   947  		return &ip.ErrLinkLocalDestinationAddress{}
   948  	}
   949  	return nil
   950  }
   951  
   952  // forwardUnicastPacket attempts to forward a unicast packet to its final
   953  // destination.
   954  func (e *endpoint) forwardUnicastPacket(pkt stack.PacketBufferPtr) ip.ForwardingError {
   955  	h := header.IPv6(pkt.NetworkHeader().Slice())
   956  
   957  	if err := validateAddressesForForwarding(h); err != nil {
   958  		return err
   959  	}
   960  
   961  	hopLimit := h.HopLimit()
   962  	if hopLimit <= 1 {
   963  		// As per RFC 4443 section 3.3,
   964  		//
   965  		//   If a router receives a packet with a Hop Limit of zero, or if a
   966  		//   router decrements a packet's Hop Limit to zero, it MUST discard the
   967  		//   packet and originate an ICMPv6 Time Exceeded message with Code 0 to
   968  		//   the source of the packet.  This indicates either a routing loop or
   969  		//   too small an initial Hop Limit value.
   970  		//
   971  		// We return the original error rather than the result of returning
   972  		// the ICMP packet because the original error is more relevant to
   973  		// the caller.
   974  		_ = e.protocol.returnError(&icmpReasonHopLimitExceeded{}, pkt, false /* deliveredLocally */)
   975  		return &ip.ErrTTLExceeded{}
   976  	}
   977  
   978  	stk := e.protocol.stack
   979  
   980  	dstAddr := h.DestinationAddress()
   981  
   982  	// Check if the destination is owned by the stack.
   983  	if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil {
   984  		inNicName := stk.FindNICNameFromID(e.nic.ID())
   985  		outNicName := stk.FindNICNameFromID(ep.nic.ID())
   986  		if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok {
   987  			// iptables is telling us to drop the packet.
   988  			e.stats.ip.IPTablesForwardDropped.Increment()
   989  			return nil
   990  		}
   991  
   992  		// The packet originally arrived on e so provide its NIC as the input NIC.
   993  		ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */)
   994  		return nil
   995  	}
   996  
   997  	// Check extension headers for any errors requiring action during forwarding.
   998  	if err := e.processExtensionHeaders(h, pkt, true /* forwarding */); err != nil {
   999  		return &ip.ErrParameterProblem{}
  1000  	}
  1001  
  1002  	r, err := stk.FindRoute(0, tcpip.Address{}, dstAddr, ProtocolNumber, false /* multicastLoop */)
  1003  	switch err.(type) {
  1004  	case nil:
  1005  	// TODO(https://gvisor.dev/issues/8105): We should not observe ErrHostUnreachable from route
  1006  	// lookups.
  1007  	case *tcpip.ErrHostUnreachable, *tcpip.ErrNetworkUnreachable:
  1008  		// We return the original error rather than the result of returning the
  1009  		// ICMP packet because the original error is more relevant to the caller.
  1010  		_ = e.protocol.returnError(&icmpReasonNetUnreachable{}, pkt, false /* deliveredLocally */)
  1011  		return &ip.ErrHostUnreachable{}
  1012  	default:
  1013  		return &ip.ErrOther{Err: err}
  1014  	}
  1015  	defer r.Release()
  1016  
  1017  	return e.forwardPacketWithRoute(r, pkt)
  1018  }
  1019  
  1020  // forwardPacketWithRoute emits the pkt using the provided route.
  1021  //
  1022  // This method should be invoked by the endpoint that received the pkt.
  1023  func (e *endpoint) forwardPacketWithRoute(route *stack.Route, pkt stack.PacketBufferPtr) ip.ForwardingError {
  1024  	h := header.IPv6(pkt.NetworkHeader().Slice())
  1025  	stk := e.protocol.stack
  1026  
  1027  	inNicName := stk.FindNICNameFromID(e.nic.ID())
  1028  	outNicName := stk.FindNICNameFromID(route.NICID())
  1029  	if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok {
  1030  		// iptables is telling us to drop the packet.
  1031  		e.stats.ip.IPTablesForwardDropped.Increment()
  1032  		return nil
  1033  	}
  1034  
  1035  	hopLimit := h.HopLimit()
  1036  
  1037  	// We need to do a deep copy of the IP packet because
  1038  	// WriteHeaderIncludedPacket takes ownership of the packet buffer, but we do
  1039  	// not own it.
  1040  	newPkt := pkt.DeepCopyForForwarding(int(route.MaxHeaderLength()))
  1041  	defer newPkt.DecRef()
  1042  	newHdr := header.IPv6(newPkt.NetworkHeader().Slice())
  1043  
  1044  	// As per RFC 8200 section 3,
  1045  	//
  1046  	//   Hop Limit           8-bit unsigned integer. Decremented by 1 by
  1047  	//                       each node that forwards the packet.
  1048  	newHdr.SetHopLimit(hopLimit - 1)
  1049  
  1050  	forwardToEp, ok := e.protocol.getEndpointForNIC(route.NICID())
  1051  	if !ok {
  1052  		// The interface was removed after we obtained the route.
  1053  		return &ip.ErrUnknownOutputEndpoint{}
  1054  	}
  1055  
  1056  	switch err := forwardToEp.writePacket(route, newPkt, newPkt.TransportProtocolNumber, true /* headerIncluded */); err.(type) {
  1057  	case nil:
  1058  		return nil
  1059  	case *tcpip.ErrMessageTooLong:
  1060  		// As per RFC 4443, section 3.2:
  1061  		//   A Packet Too Big MUST be sent by a router in response to a packet that
  1062  		//   it cannot forward because the packet is larger than the MTU of the
  1063  		//   outgoing link.
  1064  		_ = e.protocol.returnError(&icmpReasonPacketTooBig{}, pkt, false /* deliveredLocally */)
  1065  		return &ip.ErrMessageTooLong{}
  1066  	case *tcpip.ErrNoBufferSpace:
  1067  		return &ip.ErrOutgoingDeviceNoBufferSpace{}
  1068  	default:
  1069  		return &ip.ErrOther{Err: err}
  1070  	}
  1071  }
  1072  
  1073  // HandlePacket is called by the link layer when new ipv6 packets arrive for
  1074  // this endpoint.
  1075  func (e *endpoint) HandlePacket(pkt stack.PacketBufferPtr) {
  1076  	stats := e.stats.ip
  1077  
  1078  	stats.PacketsReceived.Increment()
  1079  
  1080  	if !e.isEnabled() {
  1081  		stats.DisabledPacketsReceived.Increment()
  1082  		return
  1083  	}
  1084  
  1085  	hView, ok := e.protocol.parseAndValidate(pkt)
  1086  	if !ok {
  1087  		stats.MalformedPacketsReceived.Increment()
  1088  		return
  1089  	}
  1090  	defer hView.Release()
  1091  	h := header.IPv6(hView.AsSlice())
  1092  
  1093  	if !checkV4Mapped(h, stats) {
  1094  		return
  1095  	}
  1096  
  1097  	if !e.nic.IsLoopback() {
  1098  		if !e.protocol.options.AllowExternalLoopbackTraffic {
  1099  			if header.IsV6LoopbackAddress(h.SourceAddress()) {
  1100  				stats.InvalidSourceAddressesReceived.Increment()
  1101  				return
  1102  			}
  1103  
  1104  			if header.IsV6LoopbackAddress(h.DestinationAddress()) {
  1105  				stats.InvalidDestinationAddressesReceived.Increment()
  1106  				return
  1107  			}
  1108  		}
  1109  
  1110  		if e.protocol.stack.HandleLocal() {
  1111  			addressEndpoint := e.AcquireAssignedAddress(header.IPv6(pkt.NetworkHeader().Slice()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint)
  1112  			if addressEndpoint != nil {
  1113  				addressEndpoint.DecRef()
  1114  
  1115  				// The source address is one of our own, so we never should have gotten
  1116  				// a packet like this unless HandleLocal is false or our NIC is the
  1117  				// loopback interface.
  1118  				stats.InvalidSourceAddressesReceived.Increment()
  1119  				return
  1120  			}
  1121  		}
  1122  
  1123  		// Loopback traffic skips the prerouting chain.
  1124  		inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
  1125  		if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok {
  1126  			// iptables is telling us to drop the packet.
  1127  			stats.IPTablesPreroutingDropped.Increment()
  1128  			return
  1129  		}
  1130  	}
  1131  
  1132  	e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */)
  1133  }
  1134  
  1135  // handleLocalPacket is like HandlePacket except it does not perform the
  1136  // prerouting iptables hook or check for loopback traffic that originated from
  1137  // outside of the netstack (i.e. martian loopback packets).
  1138  func (e *endpoint) handleLocalPacket(pkt stack.PacketBufferPtr, canSkipRXChecksum bool) {
  1139  	stats := e.stats.ip
  1140  	stats.PacketsReceived.Increment()
  1141  
  1142  	pkt = pkt.CloneToInbound()
  1143  	defer pkt.DecRef()
  1144  	pkt.RXChecksumValidated = canSkipRXChecksum
  1145  
  1146  	hView, ok := e.protocol.parseAndValidate(pkt)
  1147  	if !ok {
  1148  		stats.MalformedPacketsReceived.Increment()
  1149  		return
  1150  	}
  1151  	defer hView.Release()
  1152  	h := header.IPv6(hView.AsSlice())
  1153  
  1154  	if !checkV4Mapped(h, stats) {
  1155  		return
  1156  	}
  1157  
  1158  	e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */)
  1159  }
  1160  
  1161  // forwardMulticastPacket validates a multicast pkt and attempts to forward it.
  1162  //
  1163  // This method should be invoked for incoming multicast packets using the
  1164  // endpoint that received the packet.
  1165  func (e *endpoint) forwardMulticastPacket(h header.IPv6, pkt stack.PacketBufferPtr) ip.ForwardingError {
  1166  	if err := validateAddressesForForwarding(h); err != nil {
  1167  		return err
  1168  	}
  1169  
  1170  	// Check extension headers for any errors.
  1171  	if err := e.processExtensionHeaders(h, pkt, true /* forwarding */); err != nil {
  1172  		return &ip.ErrParameterProblem{}
  1173  	}
  1174  
  1175  	routeKey := stack.UnicastSourceAndMulticastDestination{
  1176  		Source:      h.SourceAddress(),
  1177  		Destination: h.DestinationAddress(),
  1178  	}
  1179  
  1180  	// The pkt has been validated. Consequently, if a route is not found, then
  1181  	// the pkt can safely be queued.
  1182  	result, hasBufferSpace := e.protocol.multicastRouteTable.GetRouteOrInsertPending(routeKey, pkt)
  1183  
  1184  	if !hasBufferSpace {
  1185  		// Unable to queue the pkt. Silently drop it.
  1186  		return &ip.ErrNoMulticastPendingQueueBufferSpace{}
  1187  	}
  1188  
  1189  	switch result.GetRouteResultState {
  1190  	case multicast.InstalledRouteFound:
  1191  		// Attempt to forward the pkt using an existing route.
  1192  		return e.forwardValidatedMulticastPacket(pkt, result.InstalledRoute)
  1193  	case multicast.NoRouteFoundAndPendingInserted:
  1194  		e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) {
  1195  			disp.OnMissingRoute(stack.MulticastPacketContext{
  1196  				stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()},
  1197  				e.nic.ID(),
  1198  			})
  1199  		})
  1200  	case multicast.PacketQueuedInPendingRoute:
  1201  	default:
  1202  		panic(fmt.Sprintf("unexpected GetRouteResultState: %s", result.GetRouteResultState))
  1203  	}
  1204  	return &ip.ErrHostUnreachable{}
  1205  }
  1206  
  1207  // forwardValidatedMulticastPacket attempts to forward the pkt using the
  1208  // provided installedRoute.
  1209  //
  1210  // This method should be invoked by the endpoint that received the pkt.
  1211  func (e *endpoint) forwardValidatedMulticastPacket(pkt stack.PacketBufferPtr, installedRoute *multicast.InstalledRoute) ip.ForwardingError {
  1212  	// Per RFC 1812 section 5.2.1.3,
  1213  	//
  1214  	//	 Based on the IP source and destination addresses found in the datagram
  1215  	//	 header, the router determines whether the datagram has been received
  1216  	//	 on the proper interface for forwarding.  If not, the datagram is
  1217  	//	 dropped silently.
  1218  	if e.nic.ID() != installedRoute.ExpectedInputInterface {
  1219  		h := header.IPv6(pkt.NetworkHeader().Slice())
  1220  		e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) {
  1221  			disp.OnUnexpectedInputInterface(stack.MulticastPacketContext{
  1222  				stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()},
  1223  				e.nic.ID(),
  1224  			}, installedRoute.ExpectedInputInterface)
  1225  		})
  1226  		return &ip.ErrUnexpectedMulticastInputInterface{}
  1227  	}
  1228  
  1229  	for _, outgoingInterface := range installedRoute.OutgoingInterfaces {
  1230  		if err := e.forwardMulticastPacketForOutgoingInterface(pkt, outgoingInterface); err != nil {
  1231  			e.handleForwardingError(err)
  1232  			continue
  1233  		}
  1234  		// The pkt was successfully forwarded. Mark the route as used.
  1235  		installedRoute.SetLastUsedTimestamp(e.protocol.stack.Clock().NowMonotonic())
  1236  	}
  1237  	return nil
  1238  }
  1239  
  1240  // forwardMulticastPacketForOutgoingInterface attempts to forward the pkt out
  1241  // of the provided outgoing interface.
  1242  //
  1243  // This method should be invoked by the endpoint that received the pkt.
  1244  func (e *endpoint) forwardMulticastPacketForOutgoingInterface(pkt stack.PacketBufferPtr, outgoingInterface stack.MulticastRouteOutgoingInterface) ip.ForwardingError {
  1245  	h := header.IPv6(pkt.NetworkHeader().Slice())
  1246  
  1247  	// Per RFC 1812 section 5.2.1.3,
  1248  	//
  1249  	//	 A copy of the multicast datagram is forwarded out each outgoing
  1250  	//	 interface whose minimum TTL value is less than or equal to the TTL
  1251  	//	 value in the datagram header.
  1252  	//
  1253  	// Copying of the packet is deferred to forwardPacketWithRoute since unicast
  1254  	// and multicast both require a copy.
  1255  	if outgoingInterface.MinTTL > h.HopLimit() {
  1256  		return &ip.ErrTTLExceeded{}
  1257  	}
  1258  
  1259  	route := e.protocol.stack.NewRouteForMulticast(outgoingInterface.ID, h.DestinationAddress(), e.NetworkProtocolNumber())
  1260  
  1261  	if route == nil {
  1262  		// Failed to convert to a stack.Route. This likely means that the outgoing
  1263  		// endpoint no longer exists.
  1264  		return &ip.ErrHostUnreachable{}
  1265  	}
  1266  	defer route.Release()
  1267  	return e.forwardPacketWithRoute(route, pkt)
  1268  }
  1269  
  1270  // handleForwardingError processes the provided err and increments any relevant
  1271  // counters.
  1272  func (e *endpoint) handleForwardingError(err ip.ForwardingError) {
  1273  	stats := e.stats.ip
  1274  	switch err := err.(type) {
  1275  	case nil:
  1276  		return
  1277  	case *ip.ErrInitializingSourceAddress:
  1278  		stats.Forwarding.InitializingSource.Increment()
  1279  	case *ip.ErrLinkLocalSourceAddress:
  1280  		stats.Forwarding.LinkLocalSource.Increment()
  1281  	case *ip.ErrLinkLocalDestinationAddress:
  1282  		stats.Forwarding.LinkLocalDestination.Increment()
  1283  	case *ip.ErrTTLExceeded:
  1284  		stats.Forwarding.ExhaustedTTL.Increment()
  1285  	case *ip.ErrHostUnreachable:
  1286  		stats.Forwarding.Unrouteable.Increment()
  1287  	case *ip.ErrParameterProblem:
  1288  		stats.Forwarding.ExtensionHeaderProblem.Increment()
  1289  	case *ip.ErrMessageTooLong:
  1290  		stats.Forwarding.PacketTooBig.Increment()
  1291  	case *ip.ErrNoMulticastPendingQueueBufferSpace:
  1292  		stats.Forwarding.NoMulticastPendingQueueBufferSpace.Increment()
  1293  	case *ip.ErrUnexpectedMulticastInputInterface:
  1294  		stats.Forwarding.UnexpectedMulticastInputInterface.Increment()
  1295  	case *ip.ErrUnknownOutputEndpoint:
  1296  		stats.Forwarding.UnknownOutputEndpoint.Increment()
  1297  	case *ip.ErrOutgoingDeviceNoBufferSpace:
  1298  		stats.Forwarding.OutgoingDeviceNoBufferSpace.Increment()
  1299  	default:
  1300  		panic(fmt.Sprintf("unrecognized forwarding error: %s", err))
  1301  	}
  1302  	stats.Forwarding.Errors.Increment()
  1303  }
  1304  
  1305  func (e *endpoint) handleValidatedPacket(h header.IPv6, pkt stack.PacketBufferPtr, inNICName string) {
  1306  	pkt.NICID = e.nic.ID()
  1307  
  1308  	// Raw socket packets are delivered based solely on the transport protocol
  1309  	// number. We only require that the packet be valid IPv6.
  1310  	e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt)
  1311  
  1312  	stats := e.stats.ip
  1313  	stats.ValidPacketsReceived.Increment()
  1314  
  1315  	srcAddr := h.SourceAddress()
  1316  	dstAddr := h.DestinationAddress()
  1317  
  1318  	// As per RFC 4291 section 2.7:
  1319  	//   Multicast addresses must not be used as source addresses in IPv6
  1320  	//   packets or appear in any Routing header.
  1321  	if header.IsV6MulticastAddress(srcAddr) {
  1322  		stats.InvalidSourceAddressesReceived.Increment()
  1323  		return
  1324  	}
  1325  
  1326  	if header.IsV6MulticastAddress(dstAddr) {
  1327  		// Handle all packets destined to a multicast address separately. Unlike
  1328  		// unicast, these packets can be both delivered locally and forwarded. See
  1329  		// RFC 1812 section 5.2.3 for details regarding the forwarding/local
  1330  		// delivery decision.
  1331  
  1332  		multicastForwading := e.MulticastForwarding() && e.protocol.multicastForwarding()
  1333  
  1334  		if multicastForwading {
  1335  			e.handleForwardingError(e.forwardMulticastPacket(h, pkt))
  1336  		}
  1337  
  1338  		if e.IsInGroup(dstAddr) {
  1339  			e.deliverPacketLocally(h, pkt, inNICName)
  1340  			return
  1341  		}
  1342  
  1343  		if !multicastForwading {
  1344  			// Only consider the destination address invalid if we didn't attempt to
  1345  			// forward the pkt and it was not delivered locally.
  1346  			stats.InvalidDestinationAddressesReceived.Increment()
  1347  		}
  1348  
  1349  		return
  1350  	}
  1351  
  1352  	// The destination address should be an address we own for us to receive the
  1353  	// packet. Otherwise, attempt to forward the packet.
  1354  	if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil {
  1355  		addressEndpoint.DecRef()
  1356  		e.deliverPacketLocally(h, pkt, inNICName)
  1357  	} else if e.Forwarding() {
  1358  		e.handleForwardingError(e.forwardUnicastPacket(pkt))
  1359  	} else {
  1360  		stats.InvalidDestinationAddressesReceived.Increment()
  1361  	}
  1362  }
  1363  
  1364  func (e *endpoint) deliverPacketLocally(h header.IPv6, pkt stack.PacketBufferPtr, inNICName string) {
  1365  	stats := e.stats.ip
  1366  
  1367  	// iptables filtering. All packets that reach here are intended for
  1368  	// this machine and need not be forwarded.
  1369  	if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok {
  1370  		// iptables is telling us to drop the packet.
  1371  		stats.IPTablesInputDropped.Increment()
  1372  		return
  1373  	}
  1374  
  1375  	// Any returned error is only useful for terminating execution early, but
  1376  	// we have nothing left to do, so we can drop it.
  1377  	_ = e.processExtensionHeaders(h, pkt, false /* forwarding */)
  1378  }
  1379  
  1380  func (e *endpoint) processExtensionHeader(it *header.IPv6PayloadIterator, pkt *stack.PacketBufferPtr, h header.IPv6, routerAlert **header.IPv6RouterAlertOption, hasFragmentHeader *bool, forwarding bool) (bool, error) {
  1381  	stats := e.stats.ip
  1382  	dstAddr := h.DestinationAddress()
  1383  	// Keep track of the start of the previous header so we can report the
  1384  	// special case of a Hop by Hop at a location other than at the start.
  1385  	previousHeaderStart := it.HeaderOffset()
  1386  	extHdr, done, err := it.Next()
  1387  	if err != nil {
  1388  		stats.MalformedPacketsReceived.Increment()
  1389  		return true, err
  1390  	}
  1391  	if done {
  1392  		return true, nil
  1393  	}
  1394  	defer extHdr.Release()
  1395  
  1396  	// As per RFC 8200, section 4:
  1397  	//
  1398  	//   Extension headers (except for the Hop-by-Hop Options header) are
  1399  	//   not processed, inserted, or deleted by any node along a packet's
  1400  	//   delivery path until the packet reaches the node identified in the
  1401  	//   Destination Address field of the IPv6 header.
  1402  	//
  1403  	// Furthermore, as per RFC 8200 section 4.1, the Hop By Hop extension
  1404  	// header is restricted to appear first in the list of extension headers.
  1405  	//
  1406  	// Therefore, we can immediately return once we hit any header other
  1407  	// than the Hop-by-Hop header while forwarding a packet.
  1408  	if forwarding {
  1409  		if _, ok := extHdr.(header.IPv6HopByHopOptionsExtHdr); !ok {
  1410  			return true, nil
  1411  		}
  1412  	}
  1413  
  1414  	switch extHdr := extHdr.(type) {
  1415  	case header.IPv6HopByHopOptionsExtHdr:
  1416  		if err := e.processIPv6HopByHopOptionsExtHdr(&extHdr, it, *pkt, dstAddr, routerAlert, previousHeaderStart, forwarding); err != nil {
  1417  			return true, err
  1418  		}
  1419  	case header.IPv6RoutingExtHdr:
  1420  		if err := e.processIPv6RoutingExtHeader(&extHdr, it, *pkt); err != nil {
  1421  			return true, err
  1422  		}
  1423  	case header.IPv6FragmentExtHdr:
  1424  		*hasFragmentHeader = true
  1425  		if extHdr.IsAtomic() {
  1426  			// This fragment extension header indicates that this packet is an
  1427  			// atomic fragment. An atomic fragment is a fragment that contains
  1428  			// all the data required to reassemble a full packet. As per RFC 6946,
  1429  			// atomic fragments must not interfere with "normal" fragmented traffic
  1430  			// so we skip processing the fragment instead of feeding it through the
  1431  			// reassembly process below.
  1432  			return false, nil
  1433  		}
  1434  
  1435  		if err := e.processFragmentExtHdr(&extHdr, it, pkt, h); err != nil {
  1436  			return true, err
  1437  		}
  1438  	case header.IPv6DestinationOptionsExtHdr:
  1439  		if err := e.processIPv6DestinationOptionsExtHdr(&extHdr, it, *pkt, dstAddr); err != nil {
  1440  			return true, err
  1441  		}
  1442  	case header.IPv6RawPayloadHeader:
  1443  		if err := e.processIPv6RawPayloadHeader(&extHdr, it, *pkt, *routerAlert, previousHeaderStart, *hasFragmentHeader); err != nil {
  1444  			return true, err
  1445  		}
  1446  	default:
  1447  		// Since the iterator returns IPv6RawPayloadHeader for unknown Extension
  1448  		// Header IDs this should never happen unless we missed a supported type
  1449  		// here.
  1450  		panic(fmt.Sprintf("unrecognized type from it.Next() = %T", extHdr))
  1451  	}
  1452  	return false, nil
  1453  }
  1454  
  1455  // processExtensionHeaders processes the extension headers in the given packet.
  1456  // Returns an error if the processing of a header failed or if the packet should
  1457  // be discarded.
  1458  func (e *endpoint) processExtensionHeaders(h header.IPv6, pkt stack.PacketBufferPtr, forwarding bool) error {
  1459  	// Create a VV to parse the packet. We don't plan to modify anything here.
  1460  	// vv consists of:
  1461  	//	- Any IPv6 header bytes after the first 40 (i.e. extensions).
  1462  	//	- The transport header, if present.
  1463  	//	- Any other payload data.
  1464  	v := pkt.NetworkHeader().View()
  1465  	if v != nil {
  1466  		v.TrimFront(header.IPv6MinimumSize)
  1467  	}
  1468  	buf := buffer.MakeWithView(v)
  1469  	buf.Append(pkt.TransportHeader().View())
  1470  	dataBuf := pkt.Data().ToBuffer()
  1471  	buf.Merge(&dataBuf)
  1472  	it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(h.NextHeader()), buf)
  1473  
  1474  	// Add a reference to pkt because fragment header processing can replace this
  1475  	// packet with a new one that has an extra reference. Adding a reference here
  1476  	// keeps the two in parity so they can both be DecRef'd the same way.
  1477  	pkt.IncRef()
  1478  	defer func() {
  1479  		pkt.DecRef()
  1480  		it.Release()
  1481  	}()
  1482  
  1483  	var (
  1484  		hasFragmentHeader bool
  1485  		routerAlert       *header.IPv6RouterAlertOption
  1486  	)
  1487  	for {
  1488  		if done, err := e.processExtensionHeader(&it, &pkt, h, &routerAlert, &hasFragmentHeader, forwarding); err != nil || done {
  1489  			return err
  1490  		}
  1491  	}
  1492  }
  1493  
  1494  func (e *endpoint) processIPv6RawPayloadHeader(extHdr *header.IPv6RawPayloadHeader, it *header.IPv6PayloadIterator, pkt stack.PacketBufferPtr, routerAlert *header.IPv6RouterAlertOption, previousHeaderStart uint32, hasFragmentHeader bool) error {
  1495  	stats := e.stats.ip
  1496  	// If the last header in the payload isn't a known IPv6 extension header,
  1497  	// handle it as if it is transport layer data.Ã¥
  1498  
  1499  	// Calculate the number of octets parsed from data. We want to consume all
  1500  	// the data except the unparsed portion located at the end, whose size is
  1501  	// extHdr.Buf.Size().
  1502  	trim := pkt.Data().Size() - int(extHdr.Buf.Size())
  1503  
  1504  	// For unfragmented packets, extHdr still contains the transport header.
  1505  	// Consume that too.
  1506  	//
  1507  	// For reassembled fragments, pkt.TransportHeader is unset, so this is a
  1508  	// no-op and pkt.Data begins with the transport header.
  1509  	trim += len(pkt.TransportHeader().Slice())
  1510  
  1511  	if _, ok := pkt.Data().Consume(trim); !ok {
  1512  		stats.MalformedPacketsReceived.Increment()
  1513  		return fmt.Errorf("could not consume %d bytes", trim)
  1514  	}
  1515  
  1516  	proto := tcpip.TransportProtocolNumber(extHdr.Identifier)
  1517  	// If the packet was reassembled from a fragment, it will not have a
  1518  	// transport header set yet.
  1519  	if len(pkt.TransportHeader().Slice()) == 0 {
  1520  		e.protocol.parseTransport(pkt, proto)
  1521  	}
  1522  
  1523  	stats.PacketsDelivered.Increment()
  1524  	if proto == header.ICMPv6ProtocolNumber {
  1525  		e.handleICMP(pkt, hasFragmentHeader, routerAlert)
  1526  		return nil
  1527  	}
  1528  	switch res := e.dispatcher.DeliverTransportPacket(proto, pkt); res {
  1529  	case stack.TransportPacketHandled:
  1530  		return nil
  1531  	case stack.TransportPacketDestinationPortUnreachable:
  1532  		// As per RFC 4443 section 3.1:
  1533  		//   A destination node SHOULD originate a Destination Unreachable
  1534  		//   message with Code 4 in response to a packet for which the
  1535  		//   transport protocol (e.g., UDP) has no listener, if that transport
  1536  		//   protocol has no alternative means to inform the sender.
  1537  		_ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt, true /* deliveredLocally */)
  1538  		return fmt.Errorf("destination port unreachable")
  1539  	case stack.TransportPacketProtocolUnreachable:
  1540  		// As per RFC 8200 section 4. (page 7):
  1541  		//   Extension headers are numbered from IANA IP Protocol Numbers
  1542  		//   [IANA-PN], the same values used for IPv4 and IPv6.  When
  1543  		//   processing a sequence of Next Header values in a packet, the
  1544  		//   first one that is not an extension header [IANA-EH] indicates
  1545  		//   that the next item in the packet is the corresponding upper-layer
  1546  		//   header.
  1547  		// With more related information on page 8:
  1548  		//   If, as a result of processing a header, the destination node is
  1549  		//   required to proceed to the next header but the Next Header value
  1550  		//   in the current header is unrecognized by the node, it should
  1551  		//   discard the packet and send an ICMP Parameter Problem message to
  1552  		//   the source of the packet, with an ICMP Code value of 1
  1553  		//   ("unrecognized Next Header type encountered") and the ICMP
  1554  		//   Pointer field containing the offset of the unrecognized value
  1555  		//   within the original packet.
  1556  		//
  1557  		// Which when taken together indicate that an unknown protocol should
  1558  		// be treated as an unrecognized next header value.
  1559  		// The location of the Next Header field is in a different place in
  1560  		// the initial IPv6 header than it is in the extension headers so
  1561  		// treat it specially.
  1562  		prevHdrIDOffset := uint32(header.IPv6NextHeaderOffset)
  1563  		if previousHeaderStart != 0 {
  1564  			prevHdrIDOffset = previousHeaderStart
  1565  		}
  1566  		_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1567  			code:    header.ICMPv6UnknownHeader,
  1568  			pointer: prevHdrIDOffset,
  1569  		}, pkt, true /* deliveredLocally */)
  1570  		return fmt.Errorf("transport protocol unreachable")
  1571  	default:
  1572  		panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
  1573  	}
  1574  }
  1575  
  1576  func (e *endpoint) processIPv6RoutingExtHeader(extHdr *header.IPv6RoutingExtHdr, it *header.IPv6PayloadIterator, pkt stack.PacketBufferPtr) error {
  1577  	// As per RFC 8200 section 4.4, if a node encounters a routing header with
  1578  	// an unrecognized routing type value, with a non-zero Segments Left
  1579  	// value, the node must discard the packet and send an ICMP Parameter
  1580  	// Problem, Code 0 to the packet's Source Address, pointing to the
  1581  	// unrecognized Routing Type.
  1582  	//
  1583  	// If the Segments Left is 0, the node must ignore the Routing extension
  1584  	// header and process the next header in the packet.
  1585  	//
  1586  	// Note, the stack does not yet handle any type of routing extension
  1587  	// header, so we just make sure Segments Left is zero before processing
  1588  	// the next extension header.
  1589  	if extHdr.SegmentsLeft() == 0 {
  1590  		return nil
  1591  	}
  1592  	_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1593  		code:    header.ICMPv6ErroneousHeader,
  1594  		pointer: it.ParseOffset(),
  1595  	}, pkt, true /* deliveredLocally */)
  1596  	return fmt.Errorf("found unrecognized routing type with non-zero segments left in header = %#v", extHdr)
  1597  }
  1598  
  1599  func (e *endpoint) processIPv6DestinationOptionsExtHdr(extHdr *header.IPv6DestinationOptionsExtHdr, it *header.IPv6PayloadIterator, pkt stack.PacketBufferPtr, dstAddr tcpip.Address) error {
  1600  	stats := e.stats.ip
  1601  	optsIt := extHdr.Iter()
  1602  	var uopt *header.IPv6UnknownExtHdrOption
  1603  	defer func() {
  1604  		if uopt != nil {
  1605  			uopt.Data.Release()
  1606  		}
  1607  	}()
  1608  
  1609  	for {
  1610  		opt, done, err := optsIt.Next()
  1611  		if err != nil {
  1612  			stats.MalformedPacketsReceived.Increment()
  1613  			return err
  1614  		}
  1615  		if uo, ok := opt.(*header.IPv6UnknownExtHdrOption); ok {
  1616  			uopt = uo
  1617  		}
  1618  		if done {
  1619  			break
  1620  		}
  1621  
  1622  		// We currently do not support any IPv6 Destination extension header
  1623  		// options.
  1624  		switch opt.UnknownAction() {
  1625  		case header.IPv6OptionUnknownActionSkip:
  1626  		case header.IPv6OptionUnknownActionDiscard:
  1627  			return fmt.Errorf("found unknown destination header option = %#v with discard action", opt)
  1628  		case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
  1629  			if header.IsV6MulticastAddress(dstAddr) {
  1630  				if uo, ok := opt.(*header.IPv6UnknownExtHdrOption); ok {
  1631  					uopt = uo
  1632  				}
  1633  				return fmt.Errorf("found unknown destination header option %#v with discard action", opt)
  1634  			}
  1635  			fallthrough
  1636  		case header.IPv6OptionUnknownActionDiscardSendICMP:
  1637  			// This case satisfies a requirement of RFC 8200 section 4.2
  1638  			// which states that an unknown option starting with bits [10] should:
  1639  			//
  1640  			//    discard the packet and, regardless of whether or not the
  1641  			//    packet's Destination Address was a multicast address, send an
  1642  			//    ICMP Parameter Problem, Code 2, message to the packet's
  1643  			//    Source Address, pointing to the unrecognized Option Type.
  1644  			//
  1645  			_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1646  				code:               header.ICMPv6UnknownOption,
  1647  				pointer:            it.ParseOffset() + optsIt.OptionOffset(),
  1648  				respondToMulticast: true,
  1649  			}, pkt, true /* deliveredLocally */)
  1650  			return fmt.Errorf("found unknown destination header option %#v with discard action", opt)
  1651  		default:
  1652  			panic(fmt.Sprintf("unrecognized action for an unrecognized Destination extension header option = %#v", opt))
  1653  		}
  1654  		if uopt != nil {
  1655  			uopt.Data.Release()
  1656  			uopt = nil
  1657  		}
  1658  	}
  1659  	return nil
  1660  }
  1661  
  1662  func (e *endpoint) processIPv6HopByHopOptionsExtHdr(extHdr *header.IPv6HopByHopOptionsExtHdr, it *header.IPv6PayloadIterator, pkt stack.PacketBufferPtr, dstAddr tcpip.Address, routerAlert **header.IPv6RouterAlertOption, previousHeaderStart uint32, forwarding bool) error {
  1663  	stats := e.stats.ip
  1664  	// As per RFC 8200 section 4.1, the Hop By Hop extension header is
  1665  	// restricted to appear immediately after an IPv6 fixed header.
  1666  	if previousHeaderStart != 0 {
  1667  		_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1668  			code:    header.ICMPv6UnknownHeader,
  1669  			pointer: previousHeaderStart,
  1670  		}, pkt, !forwarding /* deliveredLocally */)
  1671  		return fmt.Errorf("found Hop-by-Hop header = %#v with non-zero previous header offset = %d", extHdr, previousHeaderStart)
  1672  	}
  1673  
  1674  	optsIt := extHdr.Iter()
  1675  	var uopt *header.IPv6UnknownExtHdrOption
  1676  	defer func() {
  1677  		if uopt != nil {
  1678  			uopt.Data.Release()
  1679  		}
  1680  	}()
  1681  
  1682  	for {
  1683  		opt, done, err := optsIt.Next()
  1684  		if err != nil {
  1685  			stats.MalformedPacketsReceived.Increment()
  1686  			return err
  1687  		}
  1688  		if uo, ok := opt.(*header.IPv6UnknownExtHdrOption); ok {
  1689  			uopt = uo
  1690  		}
  1691  		if done {
  1692  			break
  1693  		}
  1694  
  1695  		switch opt := opt.(type) {
  1696  		case *header.IPv6RouterAlertOption:
  1697  			if *routerAlert != nil {
  1698  				// As per RFC 2711 section 3, there should be at most one Router
  1699  				// Alert option per packet.
  1700  				//
  1701  				//    There MUST only be one option of this type, regardless of
  1702  				//    value, per Hop-by-Hop header.
  1703  				stats.MalformedPacketsReceived.Increment()
  1704  				return fmt.Errorf("found multiple Router Alert options (%#v, %#v)", opt, *routerAlert)
  1705  			}
  1706  			*routerAlert = opt
  1707  			stats.OptionRouterAlertReceived.Increment()
  1708  		default:
  1709  			switch opt.UnknownAction() {
  1710  			case header.IPv6OptionUnknownActionSkip:
  1711  			case header.IPv6OptionUnknownActionDiscard:
  1712  				return fmt.Errorf("found unknown Hop-by-Hop header option = %#v with discard action", opt)
  1713  			case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
  1714  				if header.IsV6MulticastAddress(dstAddr) {
  1715  					return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt)
  1716  				}
  1717  				fallthrough
  1718  			case header.IPv6OptionUnknownActionDiscardSendICMP:
  1719  				// This case satisfies a requirement of RFC 8200 section 4.2 which
  1720  				// states that an unknown option starting with bits [10] should:
  1721  				//
  1722  				//    discard the packet and, regardless of whether or not the
  1723  				//    packet's Destination Address was a multicast address, send an
  1724  				//    ICMP Parameter Problem, Code 2, message to the packet's
  1725  				//    Source Address, pointing to the unrecognized Option Type.
  1726  				_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1727  					code:               header.ICMPv6UnknownOption,
  1728  					pointer:            it.ParseOffset() + optsIt.OptionOffset(),
  1729  					respondToMulticast: true,
  1730  				}, pkt, !forwarding /* deliveredLocally */)
  1731  				return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt)
  1732  			default:
  1733  				panic(fmt.Sprintf("unrecognized action for an unrecognized Hop By Hop extension header option = %#v", opt))
  1734  			}
  1735  		}
  1736  		if uopt != nil {
  1737  			uopt.Data.Release()
  1738  			uopt = nil
  1739  		}
  1740  	}
  1741  	return nil
  1742  }
  1743  
  1744  func (e *endpoint) processFragmentExtHdr(extHdr *header.IPv6FragmentExtHdr, it *header.IPv6PayloadIterator, pkt *stack.PacketBufferPtr, h header.IPv6) error {
  1745  	stats := e.stats.ip
  1746  	fragmentFieldOffset := it.ParseOffset()
  1747  
  1748  	// Don't consume the iterator if we have the first fragment because we
  1749  	// will use it to validate that the first fragment holds the upper layer
  1750  	// header.
  1751  	rawPayload := it.AsRawHeader(extHdr.FragmentOffset() != 0 /* consume */)
  1752  	defer rawPayload.Release()
  1753  
  1754  	if extHdr.FragmentOffset() == 0 {
  1755  		// Check that the iterator ends with a raw payload as the first fragment
  1756  		// should include all headers up to and including any upper layer
  1757  		// headers, as per RFC 8200 section 4.5; only upper layer data
  1758  		// (non-headers) should follow the fragment extension header.
  1759  		var lastHdr header.IPv6PayloadHeader
  1760  
  1761  		for {
  1762  			it, done, err := it.Next()
  1763  			if err != nil {
  1764  				stats.MalformedPacketsReceived.Increment()
  1765  				stats.MalformedFragmentsReceived.Increment()
  1766  				return err
  1767  			}
  1768  			if done {
  1769  				break
  1770  			}
  1771  			it.Release()
  1772  
  1773  			lastHdr = it
  1774  		}
  1775  
  1776  		// If the last header is a raw header, then the last portion of the IPv6
  1777  		// payload is not a known IPv6 extension header. Note, this does not
  1778  		// mean that the last portion is an upper layer header or not an
  1779  		// extension header because:
  1780  		//  1) we do not yet support all extension headers
  1781  		//  2) we do not validate the upper layer header before reassembling.
  1782  		//
  1783  		// This check makes sure that a known IPv6 extension header is not
  1784  		// present after the Fragment extension header in a non-initial
  1785  		// fragment.
  1786  		//
  1787  		// TODO(#2196): Support IPv6 Authentication and Encapsulated
  1788  		// Security Payload extension headers.
  1789  		// TODO(#2333): Validate that the upper layer header is valid.
  1790  		switch lastHdr.(type) {
  1791  		case header.IPv6RawPayloadHeader:
  1792  		default:
  1793  			stats.MalformedPacketsReceived.Increment()
  1794  			stats.MalformedFragmentsReceived.Increment()
  1795  			return fmt.Errorf("known extension header = %#v present after fragment header in a non-initial fragment", lastHdr)
  1796  		}
  1797  	}
  1798  
  1799  	fragmentPayloadLen := rawPayload.Buf.Size()
  1800  	if fragmentPayloadLen == 0 {
  1801  		// Drop the packet as it's marked as a fragment but has no payload.
  1802  		stats.MalformedPacketsReceived.Increment()
  1803  		stats.MalformedFragmentsReceived.Increment()
  1804  		return fmt.Errorf("fragment has no payload")
  1805  	}
  1806  
  1807  	// As per RFC 2460 Section 4.5:
  1808  	//
  1809  	//    If the length of a fragment, as derived from the fragment packet's
  1810  	//    Payload Length field, is not a multiple of 8 octets and the M flag
  1811  	//    of that fragment is 1, then that fragment must be discarded and an
  1812  	//    ICMP Parameter Problem, Code 0, message should be sent to the source
  1813  	//    of the fragment, pointing to the Payload Length field of the
  1814  	//    fragment packet.
  1815  	if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 {
  1816  		stats.MalformedPacketsReceived.Increment()
  1817  		stats.MalformedFragmentsReceived.Increment()
  1818  		_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1819  			code:    header.ICMPv6ErroneousHeader,
  1820  			pointer: header.IPv6PayloadLenOffset,
  1821  		}, *pkt, true /* deliveredLocally */)
  1822  		return fmt.Errorf("found fragment length = %d that is not a multiple of 8 octets", fragmentPayloadLen)
  1823  	}
  1824  
  1825  	// The packet is a fragment, let's try to reassemble it.
  1826  	start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit
  1827  
  1828  	// As per RFC 2460 Section 4.5:
  1829  	//
  1830  	//    If the length and offset of a fragment are such that the Payload
  1831  	//    Length of the packet reassembled from that fragment would exceed
  1832  	//    65,535 octets, then that fragment must be discarded and an ICMP
  1833  	//    Parameter Problem, Code 0, message should be sent to the source of
  1834  	//    the fragment, pointing to the Fragment Offset field of the fragment
  1835  	//    packet.
  1836  	lengthAfterReassembly := int(start) + int(fragmentPayloadLen)
  1837  	if lengthAfterReassembly > header.IPv6MaximumPayloadSize {
  1838  		stats.MalformedPacketsReceived.Increment()
  1839  		stats.MalformedFragmentsReceived.Increment()
  1840  		_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1841  			code:    header.ICMPv6ErroneousHeader,
  1842  			pointer: fragmentFieldOffset,
  1843  		}, *pkt, true /* deliveredLocally */)
  1844  		return fmt.Errorf("determined that reassembled packet length = %d would exceed allowed length = %d", lengthAfterReassembly, header.IPv6MaximumPayloadSize)
  1845  	}
  1846  
  1847  	// Note that pkt doesn't have its transport header set after reassembly,
  1848  	// and won't until DeliverNetworkPacket sets it.
  1849  	resPkt, proto, ready, err := e.protocol.fragmentation.Process(
  1850  		// IPv6 ignores the Protocol field since the ID only needs to be unique
  1851  		// across source-destination pairs, as per RFC 8200 section 4.5.
  1852  		fragmentation.FragmentID{
  1853  			Source:      h.SourceAddress(),
  1854  			Destination: h.DestinationAddress(),
  1855  			ID:          extHdr.ID(),
  1856  		},
  1857  		start,
  1858  		start+uint16(fragmentPayloadLen)-1,
  1859  		extHdr.More(),
  1860  		uint8(rawPayload.Identifier),
  1861  		*pkt,
  1862  	)
  1863  	if err != nil {
  1864  		stats.MalformedPacketsReceived.Increment()
  1865  		stats.MalformedFragmentsReceived.Increment()
  1866  		return err
  1867  	}
  1868  
  1869  	if ready {
  1870  		// We create a new iterator with the reassembled packet because we could
  1871  		// have more extension headers in the reassembled payload, as per RFC
  1872  		// 8200 section 4.5. We also use the NextHeader value from the first
  1873  		// fragment.
  1874  		it.Release()
  1875  		*it = header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(proto), resPkt.Data().ToBuffer())
  1876  		(*pkt).DecRef()
  1877  		*pkt = resPkt
  1878  	}
  1879  	return nil
  1880  }
  1881  
  1882  // Close cleans up resources associated with the endpoint.
  1883  func (e *endpoint) Close() {
  1884  	e.mu.Lock()
  1885  	e.disableLocked()
  1886  	e.mu.addressableEndpointState.Cleanup()
  1887  	e.mu.Unlock()
  1888  
  1889  	e.protocol.forgetEndpoint(e.nic.ID())
  1890  }
  1891  
  1892  // NetworkProtocolNumber implements stack.NetworkEndpoint.
  1893  func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
  1894  	return e.protocol.Number()
  1895  }
  1896  
  1897  // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint.
  1898  func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) {
  1899  	// TODO(b/169350103): add checks here after making sure we no longer receive
  1900  	// an empty address.
  1901  	e.mu.Lock()
  1902  	defer e.mu.Unlock()
  1903  
  1904  	// The dance of registering the dispatcher after adding the address makes it
  1905  	// so that the tentative state is skipped if DAD is disabled.
  1906  	addrDisp := properties.Disp
  1907  	properties.Disp = nil
  1908  	addressEndpoint, err := e.addAndAcquirePermanentAddressLocked(addr, properties)
  1909  	if addrDisp != nil && err == nil {
  1910  		addressEndpoint.RegisterDispatcher(addrDisp)
  1911  	}
  1912  	return addressEndpoint, err
  1913  }
  1914  
  1915  // addAndAcquirePermanentAddressLocked is like AddAndAcquirePermanentAddress but
  1916  // with locking requirements.
  1917  //
  1918  // addAndAcquirePermanentAddressLocked also joins the passed address's
  1919  // solicited-node multicast group and start duplicate address detection.
  1920  //
  1921  // Precondition: e.mu must be write locked.
  1922  func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) {
  1923  	addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquireAddress(addr, properties, stack.PermanentTentative)
  1924  	if err != nil {
  1925  		return nil, err
  1926  	}
  1927  
  1928  	if !header.IsV6UnicastAddress(addr.Address) {
  1929  		return addressEndpoint, nil
  1930  	}
  1931  
  1932  	if e.Enabled() {
  1933  		if err := e.mu.ndp.startDuplicateAddressDetection(addr.Address, addressEndpoint); err != nil {
  1934  			return nil, err
  1935  		}
  1936  	}
  1937  
  1938  	snmc := header.SolicitedNodeAddr(addr.Address)
  1939  	if err := e.joinGroupLocked(snmc); err != nil {
  1940  		// joinGroupLocked only returns an error if the group address is not a valid
  1941  		// IPv6 multicast address.
  1942  		panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", snmc, err))
  1943  	}
  1944  
  1945  	return addressEndpoint, nil
  1946  }
  1947  
  1948  // RemovePermanentAddress implements stack.AddressableEndpoint.
  1949  func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error {
  1950  	e.mu.Lock()
  1951  	defer e.mu.Unlock()
  1952  
  1953  	addressEndpoint := e.getAddressRLocked(addr)
  1954  	if addressEndpoint == nil || !addressEndpoint.GetKind().IsPermanent() {
  1955  		return &tcpip.ErrBadLocalAddress{}
  1956  	}
  1957  
  1958  	return e.removePermanentEndpointLocked(addressEndpoint, true /* allowSLAACInvalidation */, stack.AddressRemovalManualAction, &stack.DADAborted{})
  1959  }
  1960  
  1961  // removePermanentEndpointLocked is like removePermanentAddressLocked except
  1962  // it works with a stack.AddressEndpoint.
  1963  //
  1964  // Precondition: e.mu must be write locked.
  1965  func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEndpoint, allowSLAACInvalidation bool, reason stack.AddressRemovalReason, dadResult stack.DADResult) tcpip.Error {
  1966  	addr := addressEndpoint.AddressWithPrefix()
  1967  	// If we are removing an address generated via SLAAC, cleanup
  1968  	// its SLAAC resources and notify the integrator.
  1969  	if addressEndpoint.ConfigType() == stack.AddressConfigSlaac {
  1970  		if addressEndpoint.Temporary() {
  1971  			e.mu.ndp.cleanupTempSLAACAddrResourcesAndNotify(addr)
  1972  		} else {
  1973  			e.mu.ndp.cleanupSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
  1974  		}
  1975  	}
  1976  
  1977  	return e.removePermanentEndpointInnerLocked(addressEndpoint, reason, dadResult)
  1978  }
  1979  
  1980  // removePermanentEndpointInnerLocked is like removePermanentEndpointLocked
  1981  // except it does not cleanup SLAAC address state.
  1982  //
  1983  // Precondition: e.mu must be write locked.
  1984  func (e *endpoint) removePermanentEndpointInnerLocked(addressEndpoint stack.AddressEndpoint, reason stack.AddressRemovalReason, dadResult stack.DADResult) tcpip.Error {
  1985  	addr := addressEndpoint.AddressWithPrefix()
  1986  	e.mu.ndp.stopDuplicateAddressDetection(addr.Address, dadResult)
  1987  
  1988  	if err := e.mu.addressableEndpointState.RemovePermanentEndpoint(addressEndpoint, reason); err != nil {
  1989  		return err
  1990  	}
  1991  
  1992  	snmc := header.SolicitedNodeAddr(addr.Address)
  1993  	err := e.leaveGroupLocked(snmc)
  1994  	// The endpoint may have already left the multicast group.
  1995  	if _, ok := err.(*tcpip.ErrBadLocalAddress); ok {
  1996  		err = nil
  1997  	}
  1998  	return err
  1999  }
  2000  
  2001  // hasPermanentAddressLocked returns true if the endpoint has a permanent
  2002  // address equal to the passed address.
  2003  //
  2004  // Precondition: e.mu must be read or write locked.
  2005  func (e *endpoint) hasPermanentAddressRLocked(addr tcpip.Address) bool {
  2006  	addressEndpoint := e.getAddressRLocked(addr)
  2007  	if addressEndpoint == nil {
  2008  		return false
  2009  	}
  2010  	return addressEndpoint.GetKind().IsPermanent()
  2011  }
  2012  
  2013  // getAddressRLocked returns the endpoint for the passed address.
  2014  //
  2015  // Precondition: e.mu must be read or write locked.
  2016  func (e *endpoint) getAddressRLocked(localAddr tcpip.Address) stack.AddressEndpoint {
  2017  	return e.mu.addressableEndpointState.GetAddress(localAddr)
  2018  }
  2019  
  2020  // SetDeprecated implements stack.AddressableEndpoint.
  2021  func (e *endpoint) SetDeprecated(addr tcpip.Address, deprecated bool) tcpip.Error {
  2022  	e.mu.RLock()
  2023  	defer e.mu.RUnlock()
  2024  	return e.mu.addressableEndpointState.SetDeprecated(addr, deprecated)
  2025  }
  2026  
  2027  // SetLifetimes implements stack.AddressableEndpoint.
  2028  func (e *endpoint) SetLifetimes(addr tcpip.Address, lifetimes stack.AddressLifetimes) tcpip.Error {
  2029  	e.mu.RLock()
  2030  	defer e.mu.RUnlock()
  2031  	return e.mu.addressableEndpointState.SetLifetimes(addr, lifetimes)
  2032  }
  2033  
  2034  // MainAddress implements stack.AddressableEndpoint.
  2035  func (e *endpoint) MainAddress() tcpip.AddressWithPrefix {
  2036  	e.mu.RLock()
  2037  	defer e.mu.RUnlock()
  2038  	return e.mu.addressableEndpointState.MainAddress()
  2039  }
  2040  
  2041  // AcquireAssignedAddress implements stack.AddressableEndpoint.
  2042  func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
  2043  	e.mu.RLock()
  2044  	defer e.mu.RUnlock()
  2045  	return e.acquireAddressOrCreateTempLocked(localAddr, allowTemp, tempPEB)
  2046  }
  2047  
  2048  // acquireAddressOrCreateTempLocked is like AcquireAssignedAddress but with
  2049  // locking requirements.
  2050  //
  2051  // Precondition: e.mu must be write locked.
  2052  func (e *endpoint) acquireAddressOrCreateTempLocked(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
  2053  	return e.mu.addressableEndpointState.AcquireAssignedAddress(localAddr, allowTemp, tempPEB)
  2054  }
  2055  
  2056  // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint.
  2057  func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
  2058  	e.mu.RLock()
  2059  	defer e.mu.RUnlock()
  2060  	return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired)
  2061  }
  2062  
  2063  // getLinkLocalAddressRLocked returns a link-local address from the primary list
  2064  // of addresses, if one is available.
  2065  //
  2066  // See stack.PrimaryEndpointBehavior for more details about the primary list.
  2067  //
  2068  // Precondition: e.mu must be read locked.
  2069  func (e *endpoint) getLinkLocalAddressRLocked() tcpip.Address {
  2070  	var linkLocalAddr tcpip.Address
  2071  	e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
  2072  		if addressEndpoint.IsAssigned(false /* allowExpired */) {
  2073  			if addr := addressEndpoint.AddressWithPrefix().Address; header.IsV6LinkLocalUnicastAddress(addr) {
  2074  				linkLocalAddr = addr
  2075  				return false
  2076  			}
  2077  		}
  2078  		return true
  2079  	})
  2080  	return linkLocalAddr
  2081  }
  2082  
  2083  // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress
  2084  // but with locking requirements.
  2085  //
  2086  // Precondition: e.mu must be read locked.
  2087  func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
  2088  	// addrCandidate is a candidate for Source Address Selection, as per
  2089  	// RFC 6724 section 5.
  2090  	type addrCandidate struct {
  2091  		addressEndpoint stack.AddressEndpoint
  2092  		addr            tcpip.Address
  2093  		scope           header.IPv6AddressScope
  2094  
  2095  		label          uint8
  2096  		matchingPrefix uint8
  2097  	}
  2098  
  2099  	if remoteAddr.BitLen() == 0 {
  2100  		return e.mu.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired)
  2101  	}
  2102  
  2103  	// Create a candidate set of available addresses we can potentially use as a
  2104  	// source address.
  2105  	var cs []addrCandidate
  2106  	e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
  2107  		// If r is not valid for outgoing connections, it is not a valid endpoint.
  2108  		if !addressEndpoint.IsAssigned(allowExpired) {
  2109  			return true
  2110  		}
  2111  
  2112  		addr := addressEndpoint.AddressWithPrefix().Address
  2113  		scope, err := header.ScopeForIPv6Address(addr)
  2114  		if err != nil {
  2115  			// Should never happen as we got r from the primary IPv6 endpoint list and
  2116  			// ScopeForIPv6Address only returns an error if addr is not an IPv6
  2117  			// address.
  2118  			panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", addr, err))
  2119  		}
  2120  
  2121  		cs = append(cs, addrCandidate{
  2122  			addressEndpoint: addressEndpoint,
  2123  			addr:            addr,
  2124  			scope:           scope,
  2125  			label:           getLabel(addr),
  2126  			matchingPrefix:  remoteAddr.MatchingPrefix(addr),
  2127  		})
  2128  
  2129  		return true
  2130  	})
  2131  
  2132  	remoteScope, err := header.ScopeForIPv6Address(remoteAddr)
  2133  	if err != nil {
  2134  		// primaryIPv6Endpoint should never be called with an invalid IPv6 address.
  2135  		panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", remoteAddr, err))
  2136  	}
  2137  
  2138  	remoteLabel := getLabel(remoteAddr)
  2139  
  2140  	// Sort the addresses as per RFC 6724 section 5 rules 1-3.
  2141  	//
  2142  	// TODO(b/146021396): Implement rules 4, 5 of RFC 6724 section 5.
  2143  	sort.Slice(cs, func(i, j int) bool {
  2144  		sa := cs[i]
  2145  		sb := cs[j]
  2146  
  2147  		// Prefer same address as per RFC 6724 section 5 rule 1.
  2148  		if sa.addr == remoteAddr {
  2149  			return true
  2150  		}
  2151  		if sb.addr == remoteAddr {
  2152  			return false
  2153  		}
  2154  
  2155  		// Prefer appropriate scope as per RFC 6724 section 5 rule 2.
  2156  		if sa.scope < sb.scope {
  2157  			return sa.scope >= remoteScope
  2158  		} else if sb.scope < sa.scope {
  2159  			return sb.scope < remoteScope
  2160  		}
  2161  
  2162  		// Avoid deprecated addresses as per RFC 6724 section 5 rule 3.
  2163  		if saDep, sbDep := sa.addressEndpoint.Deprecated(), sb.addressEndpoint.Deprecated(); saDep != sbDep {
  2164  			// If sa is not deprecated, it is preferred over sb.
  2165  			return sbDep
  2166  		}
  2167  
  2168  		// Prefer matching label as per RFC 6724 section 5 rule 6.
  2169  		if sa, sb := sa.label == remoteLabel, sb.label == remoteLabel; sa != sb {
  2170  			if sa {
  2171  				return true
  2172  			}
  2173  			if sb {
  2174  				return false
  2175  			}
  2176  		}
  2177  
  2178  		// Prefer temporary addresses as per RFC 6724 section 5 rule 7.
  2179  		if saTemp, sbTemp := sa.addressEndpoint.Temporary(), sb.addressEndpoint.Temporary(); saTemp != sbTemp {
  2180  			return saTemp
  2181  		}
  2182  
  2183  		// Use longest matching prefix as per RFC 6724 section 5 rule 8.
  2184  		if sa.matchingPrefix > sb.matchingPrefix {
  2185  			return true
  2186  		}
  2187  		if sb.matchingPrefix > sa.matchingPrefix {
  2188  			return false
  2189  		}
  2190  
  2191  		// sa and sb are equal, return the endpoint that is closest to the front of
  2192  		// the primary endpoint list.
  2193  		return i < j
  2194  	})
  2195  
  2196  	// Return the most preferred address that can have its reference count
  2197  	// incremented.
  2198  	for _, c := range cs {
  2199  		if c.addressEndpoint.IncRef() {
  2200  			return c.addressEndpoint
  2201  		}
  2202  	}
  2203  
  2204  	return nil
  2205  }
  2206  
  2207  // PrimaryAddresses implements stack.AddressableEndpoint.
  2208  func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix {
  2209  	e.mu.RLock()
  2210  	defer e.mu.RUnlock()
  2211  	return e.mu.addressableEndpointState.PrimaryAddresses()
  2212  }
  2213  
  2214  // PermanentAddresses implements stack.AddressableEndpoint.
  2215  func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix {
  2216  	e.mu.RLock()
  2217  	defer e.mu.RUnlock()
  2218  	return e.mu.addressableEndpointState.PermanentAddresses()
  2219  }
  2220  
  2221  // JoinGroup implements stack.GroupAddressableEndpoint.
  2222  func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error {
  2223  	e.mu.Lock()
  2224  	defer e.mu.Unlock()
  2225  	return e.joinGroupLocked(addr)
  2226  }
  2227  
  2228  // joinGroupLocked is like JoinGroup but with locking requirements.
  2229  //
  2230  // Precondition: e.mu must be locked.
  2231  func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error {
  2232  	if !header.IsV6MulticastAddress(addr) {
  2233  		return &tcpip.ErrBadAddress{}
  2234  	}
  2235  
  2236  	e.mu.mld.joinGroup(addr)
  2237  	return nil
  2238  }
  2239  
  2240  // LeaveGroup implements stack.GroupAddressableEndpoint.
  2241  func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error {
  2242  	e.mu.Lock()
  2243  	defer e.mu.Unlock()
  2244  	return e.leaveGroupLocked(addr)
  2245  }
  2246  
  2247  // leaveGroupLocked is like LeaveGroup but with locking requirements.
  2248  //
  2249  // Precondition: e.mu must be locked.
  2250  func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error {
  2251  	return e.mu.mld.leaveGroup(addr)
  2252  }
  2253  
  2254  // IsInGroup implements stack.GroupAddressableEndpoint.
  2255  func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
  2256  	e.mu.RLock()
  2257  	defer e.mu.RUnlock()
  2258  	return e.mu.mld.isInGroup(addr)
  2259  }
  2260  
  2261  // Stats implements stack.NetworkEndpoint.
  2262  func (e *endpoint) Stats() stack.NetworkEndpointStats {
  2263  	return &e.stats.localStats
  2264  }
  2265  
  2266  var _ stack.NetworkProtocol = (*protocol)(nil)
  2267  var _ stack.MulticastForwardingNetworkProtocol = (*protocol)(nil)
  2268  var _ stack.RejectIPv6WithHandler = (*protocol)(nil)
  2269  var _ fragmentation.TimeoutHandler = (*protocol)(nil)
  2270  
  2271  type protocol struct {
  2272  	stack   *stack.Stack
  2273  	options Options
  2274  
  2275  	mu struct {
  2276  		sync.RWMutex
  2277  
  2278  		// eps is keyed by NICID to allow protocol methods to retrieve an endpoint
  2279  		// when handling a packet, by looking at which NIC handled the packet.
  2280  		eps map[tcpip.NICID]*endpoint
  2281  
  2282  		// ICMP types for which the stack's global rate limiting must apply.
  2283  		icmpRateLimitedTypes map[header.ICMPv6Type]struct{}
  2284  
  2285  		// multicastForwardingDisp is the multicast forwarding event dispatcher that
  2286  		// an integrator can provide to receive multicast forwarding events. Note
  2287  		// that multicast packets will only be forwarded if this is non-nil.
  2288  		multicastForwardingDisp stack.MulticastForwardingEventDispatcher
  2289  	}
  2290  
  2291  	ids    []atomicbitops.Uint32
  2292  	hashIV uint32
  2293  
  2294  	// defaultTTL is the current default TTL for the protocol. Only the
  2295  	// uint8 portion of it is meaningful.
  2296  	defaultTTL atomicbitops.Uint32
  2297  
  2298  	fragmentation   *fragmentation.Fragmentation
  2299  	icmpRateLimiter *stack.ICMPRateLimiter
  2300  
  2301  	multicastRouteTable multicast.RouteTable
  2302  }
  2303  
  2304  // Number returns the ipv6 protocol number.
  2305  func (p *protocol) Number() tcpip.NetworkProtocolNumber {
  2306  	return ProtocolNumber
  2307  }
  2308  
  2309  // MinimumPacketSize returns the minimum valid ipv6 packet size.
  2310  func (p *protocol) MinimumPacketSize() int {
  2311  	return header.IPv6MinimumSize
  2312  }
  2313  
  2314  // ParseAddresses implements stack.NetworkProtocol.
  2315  func (*protocol) ParseAddresses(b []byte) (src, dst tcpip.Address) {
  2316  	h := header.IPv6(b)
  2317  	return h.SourceAddress(), h.DestinationAddress()
  2318  }
  2319  
  2320  // NewEndpoint creates a new ipv6 endpoint.
  2321  func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
  2322  	e := &endpoint{
  2323  		nic:        nic,
  2324  		dispatcher: dispatcher,
  2325  		protocol:   p,
  2326  	}
  2327  
  2328  	// NDP options must be 8 octet aligned and the first 2 bytes are used for
  2329  	// the type and length fields leaving 6 octets as the minimum size for a
  2330  	// nonce option without padding.
  2331  	const nonceSize = 6
  2332  
  2333  	// As per RFC 7527 section 4.1,
  2334  	//
  2335  	//   If any probe is looped back within RetransTimer milliseconds after
  2336  	//   having sent DupAddrDetectTransmits NS(DAD) messages, the interface
  2337  	//   continues with another MAX_MULTICAST_SOLICIT number of NS(DAD)
  2338  	//   messages transmitted RetransTimer milliseconds apart.
  2339  	//
  2340  	// Value taken from RFC 4861 section 10.
  2341  	const maxMulticastSolicit = 3
  2342  	dadOptions := ip.DADOptions{
  2343  		Clock:              p.stack.Clock(),
  2344  		SecureRNG:          p.stack.SecureRNG(),
  2345  		NonceSize:          nonceSize,
  2346  		ExtendDADTransmits: maxMulticastSolicit,
  2347  		Protocol:           &e.mu.ndp,
  2348  		NICID:              nic.ID(),
  2349  	}
  2350  
  2351  	e.mu.Lock()
  2352  	e.mu.addressableEndpointState.Init(e, stack.AddressableEndpointStateOptions{HiddenWhileDisabled: true})
  2353  	e.mu.ndp.init(e, dadOptions)
  2354  	e.mu.mld.init(e)
  2355  	e.dad.mu.Lock()
  2356  	e.dad.mu.dad.Init(&e.dad.mu, p.options.DADConfigs, dadOptions)
  2357  	e.dad.mu.Unlock()
  2358  	e.mu.Unlock()
  2359  
  2360  	stackStats := p.stack.Stats()
  2361  	tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem())
  2362  	e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP)
  2363  	e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V6)
  2364  
  2365  	p.mu.Lock()
  2366  	defer p.mu.Unlock()
  2367  	p.mu.eps[nic.ID()] = e
  2368  	return e
  2369  }
  2370  
  2371  func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint {
  2372  	p.mu.RLock()
  2373  	defer p.mu.RUnlock()
  2374  
  2375  	for _, e := range p.mu.eps {
  2376  		if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil {
  2377  			addressEndpoint.DecRef()
  2378  			return e
  2379  		}
  2380  	}
  2381  
  2382  	return nil
  2383  }
  2384  
  2385  func (p *protocol) getEndpointForNIC(id tcpip.NICID) (*endpoint, bool) {
  2386  	p.mu.RLock()
  2387  	defer p.mu.RUnlock()
  2388  	ep, ok := p.mu.eps[id]
  2389  	return ep, ok
  2390  }
  2391  
  2392  func (p *protocol) forgetEndpoint(nicID tcpip.NICID) {
  2393  	p.mu.Lock()
  2394  	defer p.mu.Unlock()
  2395  	delete(p.mu.eps, nicID)
  2396  }
  2397  
  2398  // SetOption implements stack.NetworkProtocol.
  2399  func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error {
  2400  	switch v := option.(type) {
  2401  	case *tcpip.DefaultTTLOption:
  2402  		p.SetDefaultTTL(uint8(*v))
  2403  		return nil
  2404  	default:
  2405  		return &tcpip.ErrUnknownProtocolOption{}
  2406  	}
  2407  }
  2408  
  2409  // Option implements stack.NetworkProtocol.
  2410  func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error {
  2411  	switch v := option.(type) {
  2412  	case *tcpip.DefaultTTLOption:
  2413  		*v = tcpip.DefaultTTLOption(p.DefaultTTL())
  2414  		return nil
  2415  	default:
  2416  		return &tcpip.ErrUnknownProtocolOption{}
  2417  	}
  2418  }
  2419  
  2420  // SetDefaultTTL sets the default TTL for endpoints created with this protocol.
  2421  func (p *protocol) SetDefaultTTL(ttl uint8) {
  2422  	p.defaultTTL.Store(uint32(ttl))
  2423  }
  2424  
  2425  // DefaultTTL returns the default TTL for endpoints created with this protocol.
  2426  func (p *protocol) DefaultTTL() uint8 {
  2427  	return uint8(p.defaultTTL.Load())
  2428  }
  2429  
  2430  // emitMulticastEvent emits a multicast forwarding event using the provided
  2431  // generator if a valid event dispatcher exists.
  2432  func (e *endpoint) emitMulticastEvent(eventGenerator func(stack.MulticastForwardingEventDispatcher)) {
  2433  	e.protocol.mu.RLock()
  2434  	defer e.protocol.mu.RUnlock()
  2435  	if mcastDisp := e.protocol.mu.multicastForwardingDisp; mcastDisp != nil {
  2436  		eventGenerator(mcastDisp)
  2437  	}
  2438  }
  2439  
  2440  // Close implements stack.TransportProtocol.
  2441  func (p *protocol) Close() {
  2442  	p.fragmentation.Release()
  2443  	p.multicastRouteTable.Close()
  2444  }
  2445  
  2446  func validateUnicastSourceAndMulticastDestination(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error {
  2447  	if !header.IsV6UnicastAddress(addresses.Source) || header.IsV6LinkLocalUnicastAddress(addresses.Source) {
  2448  		return &tcpip.ErrBadAddress{}
  2449  	}
  2450  
  2451  	if !header.IsV6MulticastAddress(addresses.Destination) || header.IsV6LinkLocalMulticastAddress(addresses.Destination) {
  2452  		return &tcpip.ErrBadAddress{}
  2453  	}
  2454  
  2455  	return nil
  2456  }
  2457  
  2458  func (p *protocol) multicastForwarding() bool {
  2459  	p.mu.RLock()
  2460  	defer p.mu.RUnlock()
  2461  	return p.mu.multicastForwardingDisp != nil
  2462  }
  2463  
  2464  func (p *protocol) newInstalledRoute(route stack.MulticastRoute) (*multicast.InstalledRoute, tcpip.Error) {
  2465  	if len(route.OutgoingInterfaces) == 0 {
  2466  		return nil, &tcpip.ErrMissingRequiredFields{}
  2467  	}
  2468  
  2469  	if !p.stack.HasNIC(route.ExpectedInputInterface) {
  2470  		return nil, &tcpip.ErrUnknownNICID{}
  2471  	}
  2472  
  2473  	for _, outgoingInterface := range route.OutgoingInterfaces {
  2474  		if route.ExpectedInputInterface == outgoingInterface.ID {
  2475  			return nil, &tcpip.ErrMulticastInputCannotBeOutput{}
  2476  		}
  2477  
  2478  		if !p.stack.HasNIC(outgoingInterface.ID) {
  2479  			return nil, &tcpip.ErrUnknownNICID{}
  2480  		}
  2481  	}
  2482  	return p.multicastRouteTable.NewInstalledRoute(route), nil
  2483  }
  2484  
  2485  // AddMulticastRoute implements stack.MulticastForwardingNetworkProtocol.
  2486  func (p *protocol) AddMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination, route stack.MulticastRoute) tcpip.Error {
  2487  	if !p.multicastForwarding() {
  2488  		return &tcpip.ErrNotPermitted{}
  2489  	}
  2490  
  2491  	if err := validateUnicastSourceAndMulticastDestination(addresses); err != nil {
  2492  		return err
  2493  	}
  2494  
  2495  	installedRoute, err := p.newInstalledRoute(route)
  2496  	if err != nil {
  2497  		return err
  2498  	}
  2499  
  2500  	pendingPackets := p.multicastRouteTable.AddInstalledRoute(addresses, installedRoute)
  2501  
  2502  	for _, pkt := range pendingPackets {
  2503  		p.forwardPendingMulticastPacket(pkt, installedRoute)
  2504  	}
  2505  	return nil
  2506  }
  2507  
  2508  // RemoveMulticastRoute implements
  2509  // stack.MulticastForwardingNetworkProtocol.RemoveMulticastRoute.
  2510  func (p *protocol) RemoveMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error {
  2511  	if err := validateUnicastSourceAndMulticastDestination(addresses); err != nil {
  2512  		return err
  2513  	}
  2514  
  2515  	if removed := p.multicastRouteTable.RemoveInstalledRoute(addresses); !removed {
  2516  		return &tcpip.ErrHostUnreachable{}
  2517  	}
  2518  
  2519  	return nil
  2520  }
  2521  
  2522  // MulticastRouteLastUsedTime implements
  2523  // stack.MulticastForwardingNetworkProtocol.
  2524  func (p *protocol) MulticastRouteLastUsedTime(addresses stack.UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) {
  2525  	if err := validateUnicastSourceAndMulticastDestination(addresses); err != nil {
  2526  		return tcpip.MonotonicTime{}, err
  2527  	}
  2528  
  2529  	timestamp, found := p.multicastRouteTable.GetLastUsedTimestamp(addresses)
  2530  
  2531  	if !found {
  2532  		return tcpip.MonotonicTime{}, &tcpip.ErrHostUnreachable{}
  2533  	}
  2534  
  2535  	return timestamp, nil
  2536  }
  2537  
  2538  // EnableMulticastForwarding implements
  2539  // stack.MulticastForwardingNetworkProtocol.EnableMulticastForwarding.
  2540  func (p *protocol) EnableMulticastForwarding(disp stack.MulticastForwardingEventDispatcher) (bool, tcpip.Error) {
  2541  	p.mu.Lock()
  2542  	defer p.mu.Unlock()
  2543  
  2544  	if p.mu.multicastForwardingDisp != nil {
  2545  		return true, nil
  2546  	}
  2547  
  2548  	if disp == nil {
  2549  		return false, &tcpip.ErrInvalidOptionValue{}
  2550  	}
  2551  
  2552  	p.mu.multicastForwardingDisp = disp
  2553  	return false, nil
  2554  }
  2555  
  2556  // DisableMulticastForwarding implements
  2557  // stack.MulticastForwardingNetworkProtocol.DisableMulticastForwarding.
  2558  func (p *protocol) DisableMulticastForwarding() {
  2559  	p.mu.Lock()
  2560  	defer p.mu.Unlock()
  2561  	p.mu.multicastForwardingDisp = nil
  2562  	p.multicastRouteTable.RemoveAllInstalledRoutes()
  2563  }
  2564  
  2565  func (p *protocol) forwardPendingMulticastPacket(pkt stack.PacketBufferPtr, installedRoute *multicast.InstalledRoute) {
  2566  	defer pkt.DecRef()
  2567  
  2568  	// Attempt to forward the packet using the endpoint that it originally
  2569  	// arrived on. This ensures that the packet is only forwarded if it
  2570  	// matches the route's expected input interface (see 5a of RFC 1812 section
  2571  	// 5.2.1.3).
  2572  	ep, ok := p.getEndpointForNIC(pkt.NICID)
  2573  
  2574  	if !ok {
  2575  		// The endpoint that the packet arrived on no longer exists. Silently
  2576  		// drop the pkt.
  2577  		return
  2578  	}
  2579  
  2580  	if !ep.MulticastForwarding() {
  2581  		return
  2582  	}
  2583  
  2584  	ep.handleForwardingError(ep.forwardValidatedMulticastPacket(pkt, installedRoute))
  2585  }
  2586  
  2587  // Wait implements stack.TransportProtocol.
  2588  func (*protocol) Wait() {}
  2589  
  2590  // parseAndValidate parses the packet (including its transport layer header) and
  2591  // returns a view containing the parsed IP header. The caller is responsible
  2592  // for releasing the returned View.
  2593  //
  2594  // Returns true if the IP header was successfully parsed.
  2595  func (p *protocol) parseAndValidate(pkt stack.PacketBufferPtr) (*buffer.View, bool) {
  2596  	transProtoNum, hasTransportHdr, ok := p.Parse(pkt)
  2597  	if !ok {
  2598  		return nil, false
  2599  	}
  2600  
  2601  	h := header.IPv6(pkt.NetworkHeader().Slice())
  2602  	// Do not include the link header's size when calculating the size of the IP
  2603  	// packet.
  2604  	if !h.IsValid(pkt.Size() - len(pkt.LinkHeader().Slice())) {
  2605  		return nil, false
  2606  	}
  2607  
  2608  	if hasTransportHdr {
  2609  		p.parseTransport(pkt, transProtoNum)
  2610  	}
  2611  
  2612  	return pkt.NetworkHeader().View(), true
  2613  }
  2614  
  2615  func (p *protocol) parseTransport(pkt stack.PacketBufferPtr, transProtoNum tcpip.TransportProtocolNumber) {
  2616  	if transProtoNum == header.ICMPv6ProtocolNumber {
  2617  		// The transport layer will handle transport layer parsing errors.
  2618  		_ = parse.ICMPv6(pkt)
  2619  		return
  2620  	}
  2621  
  2622  	switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err {
  2623  	case stack.ParsedOK:
  2624  	case stack.UnknownTransportProtocol, stack.TransportLayerParseError:
  2625  		// The transport layer will handle unknown protocols and transport layer
  2626  		// parsing errors.
  2627  	default:
  2628  		panic(fmt.Sprintf("unexpected error parsing transport header = %d", err))
  2629  	}
  2630  }
  2631  
  2632  // Parse implements stack.NetworkProtocol.
  2633  func (*protocol) Parse(pkt stack.PacketBufferPtr) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) {
  2634  	proto, _, fragOffset, fragMore, ok := parse.IPv6(pkt)
  2635  	if !ok {
  2636  		return 0, false, false
  2637  	}
  2638  
  2639  	return proto, !fragMore && fragOffset == 0, true
  2640  }
  2641  
  2642  // allowICMPReply reports whether an ICMP reply with provided type may
  2643  // be sent following the rate mask options and global ICMP rate limiter.
  2644  func (p *protocol) allowICMPReply(icmpType header.ICMPv6Type) bool {
  2645  	p.mu.RLock()
  2646  	defer p.mu.RUnlock()
  2647  
  2648  	if _, ok := p.mu.icmpRateLimitedTypes[icmpType]; ok {
  2649  		return p.stack.AllowICMPMessage()
  2650  	}
  2651  	return true
  2652  }
  2653  
  2654  // SendRejectionError implements stack.RejectIPv6WithHandler.
  2655  func (p *protocol) SendRejectionError(pkt stack.PacketBufferPtr, rejectWith stack.RejectIPv6WithICMPType, inputHook bool) tcpip.Error {
  2656  	switch rejectWith {
  2657  	case stack.RejectIPv6WithICMPNoRoute:
  2658  		return p.returnError(&icmpReasonNetUnreachable{}, pkt, inputHook)
  2659  	case stack.RejectIPv6WithICMPAddrUnreachable:
  2660  		return p.returnError(&icmpReasonHostUnreachable{}, pkt, inputHook)
  2661  	case stack.RejectIPv6WithICMPPortUnreachable:
  2662  		return p.returnError(&icmpReasonPortUnreachable{}, pkt, inputHook)
  2663  	case stack.RejectIPv6WithICMPAdminProhibited:
  2664  		return p.returnError(&icmpReasonAdministrativelyProhibited{}, pkt, inputHook)
  2665  	default:
  2666  		panic(fmt.Sprintf("unhandled %[1]T = %[1]d", rejectWith))
  2667  	}
  2668  }
  2669  
  2670  // calculateNetworkMTU calculates the network-layer payload MTU based on the
  2671  // link-layer payload MTU and the length of every IPv6 header.
  2672  // Note that this is different than the Payload Length field of the IPv6 header,
  2673  // which includes the length of the extension headers.
  2674  func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, tcpip.Error) {
  2675  	if linkMTU < header.IPv6MinimumMTU {
  2676  		return 0, &tcpip.ErrInvalidEndpointState{}
  2677  	}
  2678  
  2679  	// As per RFC 7112 section 5, we should discard packets if their IPv6 header
  2680  	// is bigger than 1280 bytes (ie, the minimum link MTU) since we do not
  2681  	// support PMTU discovery:
  2682  	//   Hosts that do not discover the Path MTU MUST limit the IPv6 Header Chain
  2683  	//   length to 1280 bytes.  Limiting the IPv6 Header Chain length to 1280
  2684  	//   bytes ensures that the header chain length does not exceed the IPv6
  2685  	//   minimum MTU.
  2686  	if networkHeadersLen > header.IPv6MinimumMTU {
  2687  		return 0, &tcpip.ErrMalformedHeader{}
  2688  	}
  2689  
  2690  	networkMTU := linkMTU - networkHeadersLen
  2691  	if networkMTU > maxPayloadSize {
  2692  		networkMTU = maxPayloadSize
  2693  	}
  2694  	return networkMTU, nil
  2695  }
  2696  
  2697  // Options holds options to configure a new protocol.
  2698  type Options struct {
  2699  	// NDPConfigs is the default NDP configurations used by interfaces.
  2700  	NDPConfigs NDPConfigurations
  2701  
  2702  	// AutoGenLinkLocal determines whether or not the stack attempts to
  2703  	// auto-generate a link-local address for newly enabled non-loopback
  2704  	// NICs.
  2705  	//
  2706  	// Note, setting this to true does not mean that a link-local address is
  2707  	// assigned right away, or at all. If Duplicate Address Detection is enabled,
  2708  	// an address is only assigned if it successfully resolves. If it fails, no
  2709  	// further attempts are made to auto-generate a link-local address.
  2710  	//
  2711  	// The generated link-local address follows RFC 4291 Appendix A guidelines.
  2712  	AutoGenLinkLocal bool
  2713  
  2714  	// NDPDisp is the NDP event dispatcher that an integrator can provide to
  2715  	// receive NDP related events.
  2716  	NDPDisp NDPDispatcher
  2717  
  2718  	// OpaqueIIDOpts hold the options for generating opaque interface
  2719  	// identifiers (IIDs) as outlined by RFC 7217.
  2720  	OpaqueIIDOpts OpaqueInterfaceIdentifierOptions
  2721  
  2722  	// TempIIDSeed is used to seed the initial temporary interface identifier
  2723  	// history value used to generate IIDs for temporary SLAAC addresses.
  2724  	//
  2725  	// Temporary SLAAC addresses are short-lived addresses which are unpredictable
  2726  	// and random from the perspective of other nodes on the network. It is
  2727  	// recommended that the seed be a random byte buffer of at least
  2728  	// header.IIDSize bytes to make sure that temporary SLAAC addresses are
  2729  	// sufficiently random. It should follow minimum randomness requirements for
  2730  	// security as outlined by RFC 4086.
  2731  	//
  2732  	// Note: using a nil value, the same seed across netstack program runs, or a
  2733  	// seed that is too small would reduce randomness and increase predictability,
  2734  	// defeating the purpose of temporary SLAAC addresses.
  2735  	TempIIDSeed []byte
  2736  
  2737  	// MLD holds options for MLD.
  2738  	MLD MLDOptions
  2739  
  2740  	// DADConfigs holds the default DAD configurations used by IPv6 endpoints.
  2741  	DADConfigs stack.DADConfigurations
  2742  
  2743  	// AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e.
  2744  	// martian loopback packets) should be accepted.
  2745  	AllowExternalLoopbackTraffic bool
  2746  }
  2747  
  2748  // NewProtocolWithOptions returns an IPv6 network protocol.
  2749  func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
  2750  	opts.NDPConfigs.validate()
  2751  
  2752  	ids := hash.RandN32(buckets)
  2753  	hashIV := hash.RandN32(1)[0]
  2754  
  2755  	atomicIds := make([]atomicbitops.Uint32, len(ids))
  2756  	for i := range ids {
  2757  		atomicIds[i] = atomicbitops.FromUint32(ids[i])
  2758  	}
  2759  
  2760  	return func(s *stack.Stack) stack.NetworkProtocol {
  2761  		p := &protocol{
  2762  			stack:   s,
  2763  			options: opts,
  2764  
  2765  			ids:    atomicIds,
  2766  			hashIV: hashIV,
  2767  		}
  2768  		p.fragmentation = fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p)
  2769  		p.mu.eps = make(map[tcpip.NICID]*endpoint)
  2770  		p.SetDefaultTTL(DefaultTTL)
  2771  		// Set default ICMP rate limiting to Linux defaults.
  2772  		//
  2773  		// Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big)
  2774  		// See https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt.
  2775  		defaultIcmpTypes := make(map[header.ICMPv6Type]struct{})
  2776  		for i := header.ICMPv6Type(0); i < header.ICMPv6EchoRequest; i++ {
  2777  			switch i {
  2778  			case header.ICMPv6PacketTooBig:
  2779  				// Do not rate limit packet too big by default.
  2780  			default:
  2781  				defaultIcmpTypes[i] = struct{}{}
  2782  			}
  2783  		}
  2784  		p.mu.icmpRateLimitedTypes = defaultIcmpTypes
  2785  
  2786  		if err := p.multicastRouteTable.Init(multicast.DefaultConfig(s.Clock())); err != nil {
  2787  			panic(fmt.Sprintf("p.multicastRouteTable.Init(_): %s", err))
  2788  		}
  2789  
  2790  		return p
  2791  	}
  2792  }
  2793  
  2794  // NewProtocol is equivalent to NewProtocolWithOptions with an empty Options.
  2795  func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
  2796  	return NewProtocolWithOptions(Options{})(s)
  2797  }
  2798  
  2799  func calculateFragmentReserve(pkt stack.PacketBufferPtr) int {
  2800  	return pkt.AvailableHeaderBytes() + len(pkt.NetworkHeader().Slice()) + header.IPv6FragmentHeaderSize
  2801  }
  2802  
  2803  // hashRoute calculates a hash value for the given route. It uses the source &
  2804  // destination address and 32-bit number to generate the hash.
  2805  func hashRoute(r *stack.Route, hashIV uint32) uint32 {
  2806  	// The FNV-1a was chosen because it is a fast hashing algorithm, and
  2807  	// cryptographic properties are not needed here.
  2808  	h := fnv.New32a()
  2809  	localAddr := r.LocalAddress()
  2810  	if _, err := h.Write(localAddr.AsSlice()); err != nil {
  2811  		panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err))
  2812  	}
  2813  	remoteAddr := r.RemoteAddress()
  2814  	if _, err := h.Write(remoteAddr.AsSlice()); err != nil {
  2815  		panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err))
  2816  	}
  2817  
  2818  	s := make([]byte, 4)
  2819  	binary.LittleEndian.PutUint32(s, hashIV)
  2820  	if _, err := h.Write(s); err != nil {
  2821  		panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected ever to return an error", err))
  2822  	}
  2823  
  2824  	return h.Sum32()
  2825  }
  2826  
  2827  func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeaders header.IPv6, transportProto tcpip.TransportProtocolNumber, id uint32) (stack.PacketBufferPtr, bool) {
  2828  	fragPkt, offset, copied, more := pf.BuildNextFragment()
  2829  	fragPkt.NetworkProtocolNumber = ProtocolNumber
  2830  
  2831  	originalIPHeadersLength := len(originalIPHeaders)
  2832  
  2833  	s := header.IPv6ExtHdrSerializer{&header.IPv6SerializableFragmentExtHdr{
  2834  		FragmentOffset: uint16(offset / header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit),
  2835  		M:              more,
  2836  		Identification: id,
  2837  	}}
  2838  
  2839  	fragmentIPHeadersLength := originalIPHeadersLength + s.Length()
  2840  	fragmentIPHeaders := header.IPv6(fragPkt.NetworkHeader().Push(fragmentIPHeadersLength))
  2841  
  2842  	// Copy the IPv6 header and any extension headers already populated.
  2843  	if copied := copy(fragmentIPHeaders, originalIPHeaders); copied != originalIPHeadersLength {
  2844  		panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got %d, want %d", copied, originalIPHeadersLength))
  2845  	}
  2846  
  2847  	nextHeader, _ := s.Serialize(transportProto, fragmentIPHeaders[originalIPHeadersLength:])
  2848  
  2849  	fragmentIPHeaders.SetNextHeader(nextHeader)
  2850  	fragmentIPHeaders.SetPayloadLength(uint16(copied + fragmentIPHeadersLength - header.IPv6MinimumSize))
  2851  
  2852  	return fragPkt, more
  2853  }
  2854  
  2855  func checkV4Mapped(h header.IPv6, stats ip.MultiCounterIPStats) bool {
  2856  	// Disallow IPv4-mapped addresses per RFC 6890 section 2.2.3.
  2857  	ret := true
  2858  	if header.IsV4MappedAddress(h.SourceAddress()) {
  2859  		stats.InvalidSourceAddressesReceived.Increment()
  2860  		ret = false
  2861  	}
  2862  	if header.IsV4MappedAddress(h.DestinationAddress()) {
  2863  		stats.InvalidDestinationAddressesReceived.Increment()
  2864  		ret = false
  2865  	}
  2866  	return ret
  2867  }