github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/neighbor_entry.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stack
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/tcpip"
    23  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    24  )
    25  
    26  const (
    27  	// immediateDuration is a duration of zero for scheduling work that needs to
    28  	// be done immediately but asynchronously to avoid deadlock.
    29  	immediateDuration time.Duration = 0
    30  )
    31  
    32  // NeighborEntry describes a neighboring device in the local network.
    33  type NeighborEntry struct {
    34  	Addr      tcpip.Address
    35  	LinkAddr  tcpip.LinkAddress
    36  	State     NeighborState
    37  	UpdatedAt time.Time
    38  }
    39  
    40  // NeighborState defines the state of a NeighborEntry within the Neighbor
    41  // Unreachability Detection state machine, as per RFC 4861 section 7.3.2 and
    42  // RFC 7048.
    43  type NeighborState uint8
    44  
    45  const (
    46  	// Unknown means reachability has not been verified yet. This is the initial
    47  	// state of entries that have been created automatically by the Neighbor
    48  	// Unreachability Detection state machine.
    49  	Unknown NeighborState = iota
    50  	// Incomplete means that there is an outstanding request to resolve the
    51  	// address.
    52  	Incomplete
    53  	// Reachable means the path to the neighbor is functioning properly for both
    54  	// receive and transmit paths.
    55  	Reachable
    56  	// Stale means reachability to the neighbor is unknown, but packets are still
    57  	// able to be transmitted to the possibly stale link address.
    58  	Stale
    59  	// Delay means reachability to the neighbor is unknown and pending
    60  	// confirmation from an upper-level protocol like TCP, but packets are still
    61  	// able to be transmitted to the possibly stale link address.
    62  	Delay
    63  	// Probe means a reachability confirmation is actively being sought by
    64  	// periodically retransmitting reachability probes until a reachability
    65  	// confirmation is received, or until the maximum number of probes has been
    66  	// sent.
    67  	Probe
    68  	// Static describes entries that have been explicitly added by the user. They
    69  	// do not expire and are not deleted until explicitly removed.
    70  	Static
    71  	// Unreachable means reachability confirmation failed; the maximum number of
    72  	// reachability probes has been sent and no replies have been received.
    73  	//
    74  	// TODO(github.com/SagerNet/issue/5472): Add the following sentence when we implement
    75  	// RFC 7048: "Packets continue to be sent to the neighbor while
    76  	// re-attempting to resolve the address."
    77  	Unreachable
    78  )
    79  
    80  type timer struct {
    81  	// done indicates to the timer that the timer was stopped.
    82  	done *bool
    83  
    84  	timer tcpip.Timer
    85  }
    86  
    87  // neighborEntry implements a neighbor entry's individual node behavior, as per
    88  // RFC 4861 section 7.3.3. Neighbor Unreachability Detection operates in
    89  // parallel with the sending of packets to a neighbor, necessitating the
    90  // entry's lock to be acquired for all operations.
    91  type neighborEntry struct {
    92  	neighborEntryEntry
    93  
    94  	cache *neighborCache
    95  
    96  	// nudState points to the Neighbor Unreachability Detection configuration.
    97  	nudState *NUDState
    98  
    99  	mu struct {
   100  		sync.RWMutex
   101  
   102  		neigh NeighborEntry
   103  
   104  		// done is closed when address resolution is complete. It is nil iff s is
   105  		// incomplete and resolution is not yet in progress.
   106  		done chan struct{}
   107  
   108  		// onResolve is called with the result of address resolution.
   109  		onResolve []func(LinkResolutionResult)
   110  
   111  		isRouter bool
   112  
   113  		timer timer
   114  	}
   115  }
   116  
   117  // newNeighborEntry creates a neighbor cache entry starting at the default
   118  // state, Unknown. Transition out of Unknown by calling either
   119  // `handlePacketQueuedLocked` or `handleProbeLocked` on the newly created
   120  // neighborEntry.
   121  func newNeighborEntry(cache *neighborCache, remoteAddr tcpip.Address, nudState *NUDState) *neighborEntry {
   122  	n := &neighborEntry{
   123  		cache:    cache,
   124  		nudState: nudState,
   125  	}
   126  	n.mu.Lock()
   127  	n.mu.neigh = NeighborEntry{
   128  		Addr:  remoteAddr,
   129  		State: Unknown,
   130  	}
   131  	n.mu.Unlock()
   132  	return n
   133  
   134  }
   135  
   136  // newStaticNeighborEntry creates a neighbor cache entry starting at the
   137  // Static state. The entry can only transition out of Static by directly
   138  // calling `setStateLocked`.
   139  func newStaticNeighborEntry(cache *neighborCache, addr tcpip.Address, linkAddr tcpip.LinkAddress, state *NUDState) *neighborEntry {
   140  	entry := NeighborEntry{
   141  		Addr:      addr,
   142  		LinkAddr:  linkAddr,
   143  		State:     Static,
   144  		UpdatedAt: cache.nic.stack.clock.Now(),
   145  	}
   146  	n := &neighborEntry{
   147  		cache:    cache,
   148  		nudState: state,
   149  	}
   150  	n.mu.Lock()
   151  	n.mu.neigh = entry
   152  	n.mu.Unlock()
   153  	return n
   154  }
   155  
   156  // notifyCompletionLocked notifies those waiting for address resolution, with
   157  // the link address if resolution completed successfully.
   158  //
   159  // Precondition: e.mu MUST be locked.
   160  func (e *neighborEntry) notifyCompletionLocked(err tcpip.Error) {
   161  	res := LinkResolutionResult{LinkAddress: e.mu.neigh.LinkAddr, Err: err}
   162  	for _, callback := range e.mu.onResolve {
   163  		callback(res)
   164  	}
   165  	e.mu.onResolve = nil
   166  	if ch := e.mu.done; ch != nil {
   167  		close(ch)
   168  		e.mu.done = nil
   169  		// Dequeue the pending packets asynchronously to not hold up the current
   170  		// goroutine as writing packets may be a costly operation.
   171  		//
   172  		// At the time of writing, when writing packets, a neighbor's link address
   173  		// is resolved (which ends up obtaining the entry's lock) while holding the
   174  		// link resolution queue's lock. Dequeuing packets asynchronously avoids a
   175  		// lock ordering violation.
   176  		//
   177  		// NB: this is equivalent to spawning a goroutine directly using the go
   178  		// keyword but allows tests that use manual clocks to deterministically
   179  		// wait for this work to complete.
   180  		e.cache.nic.stack.clock.AfterFunc(0, func() {
   181  			e.cache.nic.linkResQueue.dequeue(ch, e.mu.neigh.LinkAddr, err)
   182  		})
   183  	}
   184  }
   185  
   186  // dispatchAddEventLocked signals to stack's NUD Dispatcher that the entry has
   187  // been added.
   188  //
   189  // Precondition: e.mu MUST be locked.
   190  func (e *neighborEntry) dispatchAddEventLocked() {
   191  	if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil {
   192  		nudDisp.OnNeighborAdded(e.cache.nic.id, e.mu.neigh)
   193  	}
   194  }
   195  
   196  // dispatchChangeEventLocked signals to stack's NUD Dispatcher that the entry
   197  // has changed state or link-layer address.
   198  //
   199  // Precondition: e.mu MUST be locked.
   200  func (e *neighborEntry) dispatchChangeEventLocked() {
   201  	if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil {
   202  		nudDisp.OnNeighborChanged(e.cache.nic.id, e.mu.neigh)
   203  	}
   204  }
   205  
   206  // dispatchRemoveEventLocked signals to stack's NUD Dispatcher that the entry
   207  // has been removed.
   208  //
   209  // Precondition: e.mu MUST be locked.
   210  func (e *neighborEntry) dispatchRemoveEventLocked() {
   211  	if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil {
   212  		nudDisp.OnNeighborRemoved(e.cache.nic.id, e.mu.neigh)
   213  	}
   214  }
   215  
   216  // cancelTimerLocked cancels the currently scheduled action, if there is one.
   217  // Entries in Unknown, Stale, or Static state do not have a scheduled action.
   218  //
   219  // Precondition: e.mu MUST be locked.
   220  func (e *neighborEntry) cancelTimerLocked() {
   221  	if e.mu.timer.timer != nil {
   222  		e.mu.timer.timer.Stop()
   223  		*e.mu.timer.done = true
   224  
   225  		e.mu.timer = timer{}
   226  	}
   227  }
   228  
   229  // removeLocked prepares the entry for removal.
   230  //
   231  // Precondition: e.mu MUST be locked.
   232  func (e *neighborEntry) removeLocked() {
   233  	e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.Now()
   234  	e.dispatchRemoveEventLocked()
   235  	e.cancelTimerLocked()
   236  	// TODO(https://github.com/SagerNet/issues/5583): test the case where this function is
   237  	// called during resolution; that can happen in at least these scenarios:
   238  	//
   239  	// - manual address removal during resolution
   240  	//
   241  	// - neighbor cache eviction during resolution
   242  	e.notifyCompletionLocked(&tcpip.ErrAborted{})
   243  }
   244  
   245  // setStateLocked transitions the entry to the specified state immediately.
   246  //
   247  // Follows the logic defined in RFC 4861 section 7.3.3.
   248  //
   249  // Precondition: e.mu MUST be locked.
   250  func (e *neighborEntry) setStateLocked(next NeighborState) {
   251  	e.cancelTimerLocked()
   252  
   253  	prev := e.mu.neigh.State
   254  	e.mu.neigh.State = next
   255  	e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.Now()
   256  	config := e.nudState.Config()
   257  
   258  	switch next {
   259  	case Incomplete:
   260  		panic(fmt.Sprintf("should never transition to Incomplete with setStateLocked; neigh = %#v, prev state = %s", e.mu.neigh, prev))
   261  
   262  	case Reachable:
   263  		// Protected by e.mu.
   264  		done := false
   265  
   266  		e.mu.timer = timer{
   267  			done: &done,
   268  			timer: e.cache.nic.stack.Clock().AfterFunc(e.nudState.ReachableTime(), func() {
   269  				e.mu.Lock()
   270  				defer e.mu.Unlock()
   271  
   272  				if done {
   273  					// The timer was stopped because the entry changed state.
   274  					return
   275  				}
   276  
   277  				e.setStateLocked(Stale)
   278  				e.dispatchChangeEventLocked()
   279  			}),
   280  		}
   281  
   282  	case Delay:
   283  		// Protected by e.mu.
   284  		done := false
   285  
   286  		e.mu.timer = timer{
   287  			done: &done,
   288  			timer: e.cache.nic.stack.Clock().AfterFunc(config.DelayFirstProbeTime, func() {
   289  				e.mu.Lock()
   290  				defer e.mu.Unlock()
   291  
   292  				if done {
   293  					// The timer was stopped because the entry changed state.
   294  					return
   295  				}
   296  
   297  				e.setStateLocked(Probe)
   298  				e.dispatchChangeEventLocked()
   299  			}),
   300  		}
   301  
   302  	case Probe:
   303  		// Protected by e.mu.
   304  		done := false
   305  
   306  		remaining := config.MaxUnicastProbes
   307  		addr := e.mu.neigh.Addr
   308  		linkAddr := e.mu.neigh.LinkAddr
   309  
   310  		// Send a probe in another gorountine to free this thread of execution
   311  		// for finishing the state transition. This is necessary to escape the
   312  		// currently held lock so we can send the probe message without holding
   313  		// a shared lock.
   314  		e.mu.timer = timer{
   315  			done: &done,
   316  			timer: e.cache.nic.stack.Clock().AfterFunc(immediateDuration, func() {
   317  				var err tcpip.Error = &tcpip.ErrTimeout{}
   318  				if remaining != 0 {
   319  					err = e.cache.linkRes.LinkAddressRequest(addr, "" /* localAddr */, linkAddr)
   320  				}
   321  
   322  				e.mu.Lock()
   323  				defer e.mu.Unlock()
   324  
   325  				if done {
   326  					// The timer was stopped because the entry changed state.
   327  					return
   328  				}
   329  
   330  				if err != nil {
   331  					e.setStateLocked(Unreachable)
   332  					e.notifyCompletionLocked(err)
   333  					e.dispatchChangeEventLocked()
   334  					return
   335  				}
   336  
   337  				remaining--
   338  				e.mu.timer.timer.Reset(config.RetransmitTimer)
   339  			}),
   340  		}
   341  
   342  	case Unreachable:
   343  
   344  	case Unknown, Stale, Static:
   345  		// Do nothing
   346  
   347  	default:
   348  		panic(fmt.Sprintf("Invalid state transition from %q to %q", prev, next))
   349  	}
   350  }
   351  
   352  // handlePacketQueuedLocked advances the state machine according to a packet
   353  // being queued for outgoing transmission.
   354  //
   355  // Follows the logic defined in RFC 4861 section 7.3.3.
   356  //
   357  // Precondition: e.mu MUST be locked.
   358  func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) {
   359  	switch e.mu.neigh.State {
   360  	case Unknown, Unreachable:
   361  		prev := e.mu.neigh.State
   362  		e.mu.neigh.State = Incomplete
   363  		e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.Now()
   364  
   365  		switch prev {
   366  		case Unknown:
   367  			e.dispatchAddEventLocked()
   368  		case Unreachable:
   369  			e.dispatchChangeEventLocked()
   370  			e.cache.nic.stats.neighbor.unreachableEntryLookups.Increment()
   371  		}
   372  
   373  		config := e.nudState.Config()
   374  
   375  		// Protected by e.mu.
   376  		done := false
   377  
   378  		remaining := config.MaxMulticastProbes
   379  		addr := e.mu.neigh.Addr
   380  
   381  		// Send a probe in another gorountine to free this thread of execution
   382  		// for finishing the state transition. This is necessary to escape the
   383  		// currently held lock so we can send the probe message without holding
   384  		// a shared lock.
   385  		e.mu.timer = timer{
   386  			done: &done,
   387  			timer: e.cache.nic.stack.Clock().AfterFunc(immediateDuration, func() {
   388  				var err tcpip.Error = &tcpip.ErrTimeout{}
   389  				if remaining != 0 {
   390  					// As per RFC 4861 section 7.2.2:
   391  					//
   392  					//  If the source address of the packet prompting the solicitation is
   393  					//  the same as one of the addresses assigned to the outgoing interface,
   394  					//  that address SHOULD be placed in the IP Source Address of the
   395  					//  outgoing solicitation.
   396  					//
   397  					err = e.cache.linkRes.LinkAddressRequest(addr, localAddr, "" /* linkAddr */)
   398  				}
   399  
   400  				e.mu.Lock()
   401  				defer e.mu.Unlock()
   402  
   403  				if done {
   404  					// The timer was stopped because the entry changed state.
   405  					return
   406  				}
   407  
   408  				if err != nil {
   409  					e.setStateLocked(Unreachable)
   410  					e.notifyCompletionLocked(err)
   411  					e.dispatchChangeEventLocked()
   412  					return
   413  				}
   414  
   415  				remaining--
   416  				e.mu.timer.timer.Reset(config.RetransmitTimer)
   417  			}),
   418  		}
   419  
   420  	case Stale:
   421  		e.setStateLocked(Delay)
   422  		e.dispatchChangeEventLocked()
   423  
   424  	case Incomplete, Reachable, Delay, Probe, Static:
   425  		// Do nothing
   426  	default:
   427  		panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State))
   428  	}
   429  }
   430  
   431  // handleProbeLocked processes an incoming neighbor probe (e.g. ARP request or
   432  // Neighbor Solicitation for ARP or NDP, respectively).
   433  //
   434  // Follows the logic defined in RFC 4861 section 7.2.3.
   435  //
   436  // Precondition: e.mu MUST be locked.
   437  func (e *neighborEntry) handleProbeLocked(remoteLinkAddr tcpip.LinkAddress) {
   438  	// Probes MUST be silently discarded if the target address is tentative, does
   439  	// not exist, or not bound to the NIC as per RFC 4861 section 7.2.3. These
   440  	// checks MUST be done by the NetworkEndpoint.
   441  
   442  	switch e.mu.neigh.State {
   443  	case Unknown:
   444  		e.mu.neigh.LinkAddr = remoteLinkAddr
   445  		e.setStateLocked(Stale)
   446  		e.dispatchAddEventLocked()
   447  
   448  	case Incomplete:
   449  		// "If an entry already exists, and the cached link-layer address
   450  		// differs from the one in the received Source Link-Layer option, the
   451  		// cached address should be replaced by the received address, and the
   452  		// entry's reachability state MUST be set to STALE."
   453  		//  - RFC 4861 section 7.2.3
   454  		e.mu.neigh.LinkAddr = remoteLinkAddr
   455  		e.setStateLocked(Stale)
   456  		e.notifyCompletionLocked(nil)
   457  		e.dispatchChangeEventLocked()
   458  
   459  	case Reachable, Delay, Probe:
   460  		if e.mu.neigh.LinkAddr != remoteLinkAddr {
   461  			e.mu.neigh.LinkAddr = remoteLinkAddr
   462  			e.setStateLocked(Stale)
   463  			e.dispatchChangeEventLocked()
   464  		}
   465  
   466  	case Stale:
   467  		if e.mu.neigh.LinkAddr != remoteLinkAddr {
   468  			e.mu.neigh.LinkAddr = remoteLinkAddr
   469  			e.dispatchChangeEventLocked()
   470  		}
   471  
   472  	case Unreachable:
   473  		// TODO(github.com/SagerNet/issue/5472): Do not change the entry if the link
   474  		// address is the same, as per RFC 7048.
   475  		e.mu.neigh.LinkAddr = remoteLinkAddr
   476  		e.setStateLocked(Stale)
   477  		e.dispatchChangeEventLocked()
   478  
   479  	case Static:
   480  		// Do nothing
   481  
   482  	default:
   483  		panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State))
   484  	}
   485  }
   486  
   487  // handleConfirmationLocked processes an incoming neighbor confirmation
   488  // (e.g. ARP reply or Neighbor Advertisement for ARP or NDP, respectively).
   489  //
   490  // Follows the state machine defined by RFC 4861 section 7.2.5.
   491  //
   492  // TODO(github.com/SagerNet/issue/2277): To protect against ARP poisoning and other
   493  // attacks against NDP functions, Secure Neighbor Discovery (SEND) Protocol
   494  // should be deployed where preventing access to the broadcast segment might
   495  // not be possible. SEND uses RSA key pairs to produce Cryptographically
   496  // Generated Addresses (CGA), as defined in RFC 3972. This ensures that the
   497  // claimed source of an NDP message is the owner of the claimed address.
   498  //
   499  // Precondition: e.mu MUST be locked.
   500  func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) {
   501  	switch e.mu.neigh.State {
   502  	case Incomplete:
   503  		if len(linkAddr) == 0 {
   504  			// "If the link layer has addresses and no Target Link-Layer Address
   505  			// option is included, the receiving node SHOULD silently discard the
   506  			// received advertisement." - RFC 4861 section 7.2.5
   507  			break
   508  		}
   509  
   510  		e.mu.neigh.LinkAddr = linkAddr
   511  		if flags.Solicited {
   512  			e.setStateLocked(Reachable)
   513  		} else {
   514  			e.setStateLocked(Stale)
   515  		}
   516  		e.dispatchChangeEventLocked()
   517  		e.mu.isRouter = flags.IsRouter
   518  		e.notifyCompletionLocked(nil)
   519  
   520  		// "Note that the Override flag is ignored if the entry is in the
   521  		// INCOMPLETE state." - RFC 4861 section 7.2.5
   522  
   523  	case Reachable, Stale, Delay, Probe:
   524  		isLinkAddrDifferent := len(linkAddr) != 0 && e.mu.neigh.LinkAddr != linkAddr
   525  
   526  		if isLinkAddrDifferent {
   527  			if !flags.Override {
   528  				if e.mu.neigh.State == Reachable {
   529  					e.setStateLocked(Stale)
   530  					e.dispatchChangeEventLocked()
   531  				}
   532  				break
   533  			}
   534  
   535  			e.mu.neigh.LinkAddr = linkAddr
   536  
   537  			if !flags.Solicited {
   538  				if e.mu.neigh.State != Stale {
   539  					e.setStateLocked(Stale)
   540  					e.dispatchChangeEventLocked()
   541  				} else {
   542  					// Notify the LinkAddr change, even though NUD state hasn't changed.
   543  					e.dispatchChangeEventLocked()
   544  				}
   545  				break
   546  			}
   547  		}
   548  
   549  		if flags.Solicited && (flags.Override || !isLinkAddrDifferent) {
   550  			wasReachable := e.mu.neigh.State == Reachable
   551  			// Set state to Reachable again to refresh timers.
   552  			e.setStateLocked(Reachable)
   553  			e.notifyCompletionLocked(nil)
   554  			if !wasReachable {
   555  				e.dispatchChangeEventLocked()
   556  			}
   557  		}
   558  
   559  		if e.mu.isRouter && !flags.IsRouter && header.IsV6UnicastAddress(e.mu.neigh.Addr) {
   560  			// "In those cases where the IsRouter flag changes from TRUE to FALSE as
   561  			// a result of this update, the node MUST remove that router from the
   562  			// Default Router List and update the Destination Cache entries for all
   563  			// destinations using that neighbor as a router as specified in Section
   564  			// 7.3.3.  This is needed to detect when a node that is used as a router
   565  			// stops forwarding packets due to being configured as a host."
   566  			//  - RFC 4861 section 7.2.5
   567  			//
   568  			// TODO(github.com/SagerNet/issue/4085): Remove the special casing we do for IPv6
   569  			// here.
   570  			ep, ok := e.cache.nic.networkEndpoints[header.IPv6ProtocolNumber]
   571  			if !ok {
   572  				panic(fmt.Sprintf("have a neighbor entry for an IPv6 router but no IPv6 network endpoint"))
   573  			}
   574  
   575  			if ndpEP, ok := ep.(NDPEndpoint); ok {
   576  				ndpEP.InvalidateDefaultRouter(e.mu.neigh.Addr)
   577  			}
   578  		}
   579  		e.mu.isRouter = flags.IsRouter
   580  
   581  	case Unknown, Unreachable, Static:
   582  		// Do nothing
   583  
   584  	default:
   585  		panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State))
   586  	}
   587  }
   588  
   589  // handleUpperLevelConfirmationLocked processes an incoming upper-level protocol
   590  // (e.g. TCP acknowledgements) reachability confirmation.
   591  //
   592  // Precondition: e.mu MUST be locked.
   593  func (e *neighborEntry) handleUpperLevelConfirmationLocked() {
   594  	switch e.mu.neigh.State {
   595  	case Reachable, Stale, Delay, Probe:
   596  		wasReachable := e.mu.neigh.State == Reachable
   597  		// Set state to Reachable again to refresh timers.
   598  		e.setStateLocked(Reachable)
   599  		if !wasReachable {
   600  			e.dispatchChangeEventLocked()
   601  		}
   602  
   603  	case Unknown, Incomplete, Unreachable, Static:
   604  		// Do nothing
   605  
   606  	default:
   607  		panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State))
   608  	}
   609  }