github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/tcpip/stack/neighbor_entry.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stack 16 17 import ( 18 "fmt" 19 "time" 20 21 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip" 22 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header" 23 ) 24 25 const ( 26 // immediateDuration is a duration of zero for scheduling work that needs to 27 // be done immediately but asynchronously to avoid deadlock. 28 immediateDuration time.Duration = 0 29 ) 30 31 // NeighborEntry describes a neighboring device in the local network. 32 type NeighborEntry struct { 33 Addr tcpip.Address 34 LinkAddr tcpip.LinkAddress 35 State NeighborState 36 UpdatedAt tcpip.MonotonicTime 37 } 38 39 // NeighborState defines the state of a NeighborEntry within the Neighbor 40 // Unreachability Detection state machine, as per RFC 4861 section 7.3.2 and 41 // RFC 7048. 42 type NeighborState uint8 43 44 const ( 45 // Unknown means reachability has not been verified yet. This is the initial 46 // state of entries that have been created automatically by the Neighbor 47 // Unreachability Detection state machine. 48 Unknown NeighborState = iota 49 // Incomplete means that there is an outstanding request to resolve the 50 // address. 51 Incomplete 52 // Reachable means the path to the neighbor is functioning properly for both 53 // receive and transmit paths. 54 Reachable 55 // Stale means reachability to the neighbor is unknown, but packets are still 56 // able to be transmitted to the possibly stale link address. 57 Stale 58 // Delay means reachability to the neighbor is unknown and pending 59 // confirmation from an upper-level protocol like TCP, but packets are still 60 // able to be transmitted to the possibly stale link address. 61 Delay 62 // Probe means a reachability confirmation is actively being sought by 63 // periodically retransmitting reachability probes until a reachability 64 // confirmation is received, or until the maximum number of probes has been 65 // sent. 66 Probe 67 // Static describes entries that have been explicitly added by the user. They 68 // do not expire and are not deleted until explicitly removed. 69 Static 70 // Unreachable means reachability confirmation failed; the maximum number of 71 // reachability probes has been sent and no replies have been received. 72 // 73 // TODO(gvisor.dev/issue/5472): Add the following sentence when we implement 74 // RFC 7048: "Packets continue to be sent to the neighbor while 75 // re-attempting to resolve the address." 76 Unreachable 77 ) 78 79 type timer struct { 80 // done indicates to the timer that the timer was stopped. 81 done *bool 82 83 timer tcpip.Timer 84 } 85 86 // neighborEntry implements a neighbor entry's individual node behavior, as per 87 // RFC 4861 section 7.3.3. Neighbor Unreachability Detection operates in 88 // parallel with the sending of packets to a neighbor, necessitating the 89 // entry's lock to be acquired for all operations. 90 type neighborEntry struct { 91 neighborEntryEntry 92 93 cache *neighborCache 94 95 // nudState points to the Neighbor Unreachability Detection configuration. 96 nudState *NUDState 97 98 mu struct { 99 neighborEntryRWMutex 100 101 neigh NeighborEntry 102 103 // done is closed when address resolution is complete. It is nil iff s is 104 // incomplete and resolution is not yet in progress. 105 done chan struct{} 106 107 // onResolve is called with the result of address resolution. 108 onResolve []func(LinkResolutionResult) 109 110 isRouter bool 111 112 timer timer 113 } 114 } 115 116 // newNeighborEntry creates a neighbor cache entry starting at the default 117 // state, Unknown. Transition out of Unknown by calling either 118 // `handlePacketQueuedLocked` or `handleProbeLocked` on the newly created 119 // neighborEntry. 120 func newNeighborEntry(cache *neighborCache, remoteAddr tcpip.Address, nudState *NUDState) *neighborEntry { 121 n := &neighborEntry{ 122 cache: cache, 123 nudState: nudState, 124 } 125 n.mu.Lock() 126 n.mu.neigh = NeighborEntry{ 127 Addr: remoteAddr, 128 State: Unknown, 129 } 130 n.mu.Unlock() 131 return n 132 133 } 134 135 // newStaticNeighborEntry creates a neighbor cache entry starting at the 136 // Static state. The entry can only transition out of Static by directly 137 // calling `setStateLocked`. 138 func newStaticNeighborEntry(cache *neighborCache, addr tcpip.Address, linkAddr tcpip.LinkAddress, state *NUDState) *neighborEntry { 139 entry := NeighborEntry{ 140 Addr: addr, 141 LinkAddr: linkAddr, 142 State: Static, 143 UpdatedAt: cache.nic.stack.clock.NowMonotonic(), 144 } 145 n := &neighborEntry{ 146 cache: cache, 147 nudState: state, 148 } 149 n.mu.Lock() 150 n.mu.neigh = entry 151 n.mu.Unlock() 152 return n 153 } 154 155 // notifyCompletionLocked notifies those waiting for address resolution, with 156 // the link address if resolution completed successfully. 157 // 158 // Precondition: e.mu MUST be locked. 159 func (e *neighborEntry) notifyCompletionLocked(err tcpip.Error) { 160 res := LinkResolutionResult{LinkAddress: e.mu.neigh.LinkAddr, Err: err} 161 for _, callback := range e.mu.onResolve { 162 callback(res) 163 } 164 e.mu.onResolve = nil 165 if ch := e.mu.done; ch != nil { 166 close(ch) 167 e.mu.done = nil 168 // Dequeue the pending packets asynchronously to not hold up the current 169 // goroutine as writing packets may be a costly operation. 170 // 171 // At the time of writing, when writing packets, a neighbor's link address 172 // is resolved (which ends up obtaining the entry's lock) while holding the 173 // link resolution queue's lock. Dequeuing packets asynchronously avoids a 174 // lock ordering violation. 175 // 176 // NB: this is equivalent to spawning a goroutine directly using the go 177 // keyword but allows tests that use manual clocks to deterministically 178 // wait for this work to complete. 179 e.cache.nic.stack.clock.AfterFunc(0, func() { 180 e.cache.nic.linkResQueue.dequeue(ch, e.mu.neigh.LinkAddr, err) 181 }) 182 } 183 } 184 185 // dispatchAddEventLocked signals to stack's NUD Dispatcher that the entry has 186 // been added. 187 // 188 // Precondition: e.mu MUST be locked. 189 func (e *neighborEntry) dispatchAddEventLocked() { 190 if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { 191 nudDisp.OnNeighborAdded(e.cache.nic.id, e.mu.neigh) 192 } 193 } 194 195 // dispatchChangeEventLocked signals to stack's NUD Dispatcher that the entry 196 // has changed state or link-layer address. 197 // 198 // Precondition: e.mu MUST be locked. 199 func (e *neighborEntry) dispatchChangeEventLocked() { 200 if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { 201 nudDisp.OnNeighborChanged(e.cache.nic.id, e.mu.neigh) 202 } 203 } 204 205 // dispatchRemoveEventLocked signals to stack's NUD Dispatcher that the entry 206 // has been removed. 207 // 208 // Precondition: e.mu MUST be locked. 209 func (e *neighborEntry) dispatchRemoveEventLocked() { 210 if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { 211 nudDisp.OnNeighborRemoved(e.cache.nic.id, e.mu.neigh) 212 } 213 } 214 215 // cancelTimerLocked cancels the currently scheduled action, if there is one. 216 // Entries in Unknown, Stale, or Static state do not have a scheduled action. 217 // 218 // Precondition: e.mu MUST be locked. 219 func (e *neighborEntry) cancelTimerLocked() { 220 if e.mu.timer.timer != nil { 221 e.mu.timer.timer.Stop() 222 *e.mu.timer.done = true 223 224 e.mu.timer = timer{} 225 } 226 } 227 228 // removeLocked prepares the entry for removal. 229 // 230 // Precondition: e.mu MUST be locked. 231 func (e *neighborEntry) removeLocked() { 232 e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.NowMonotonic() 233 e.dispatchRemoveEventLocked() 234 // Set state to unknown to invalidate this entry if it's cached in a Route. 235 e.setStateLocked(Unknown) 236 e.cancelTimerLocked() 237 // TODO(https://gvisor.dev/issues/5583): test the case where this function is 238 // called during resolution; that can happen in at least these scenarios: 239 // 240 // - manual address removal during resolution 241 // 242 // - neighbor cache eviction during resolution 243 e.notifyCompletionLocked(&tcpip.ErrAborted{}) 244 } 245 246 // setStateLocked transitions the entry to the specified state immediately. 247 // 248 // Follows the logic defined in RFC 4861 section 7.3.3. 249 // 250 // Precondition: e.mu MUST be locked. 251 func (e *neighborEntry) setStateLocked(next NeighborState) { 252 e.cancelTimerLocked() 253 254 prev := e.mu.neigh.State 255 e.mu.neigh.State = next 256 e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.NowMonotonic() 257 config := e.nudState.Config() 258 259 switch next { 260 case Incomplete: 261 panic(fmt.Sprintf("should never transition to Incomplete with setStateLocked; neigh = %#v, prev state = %s", e.mu.neigh, prev)) 262 263 case Reachable: 264 // Protected by e.mu. 265 done := false 266 267 e.mu.timer = timer{ 268 done: &done, 269 timer: e.cache.nic.stack.Clock().AfterFunc(e.nudState.ReachableTime(), func() { 270 e.mu.Lock() 271 defer e.mu.Unlock() 272 273 if done { 274 // The timer was stopped because the entry changed state. 275 return 276 } 277 278 e.setStateLocked(Stale) 279 e.dispatchChangeEventLocked() 280 }), 281 } 282 283 case Delay: 284 // Protected by e.mu. 285 done := false 286 287 e.mu.timer = timer{ 288 done: &done, 289 timer: e.cache.nic.stack.Clock().AfterFunc(config.DelayFirstProbeTime, func() { 290 e.mu.Lock() 291 defer e.mu.Unlock() 292 293 if done { 294 // The timer was stopped because the entry changed state. 295 return 296 } 297 298 e.setStateLocked(Probe) 299 e.dispatchChangeEventLocked() 300 }), 301 } 302 303 case Probe: 304 // Protected by e.mu. 305 done := false 306 307 remaining := config.MaxUnicastProbes 308 addr := e.mu.neigh.Addr 309 linkAddr := e.mu.neigh.LinkAddr 310 311 // Send a probe in another gorountine to free this thread of execution 312 // for finishing the state transition. This is necessary to escape the 313 // currently held lock so we can send the probe message without holding 314 // a shared lock. 315 e.mu.timer = timer{ 316 done: &done, 317 timer: e.cache.nic.stack.Clock().AfterFunc(immediateDuration, func() { 318 var err tcpip.Error = &tcpip.ErrTimeout{} 319 if remaining != 0 { 320 err = e.cache.linkRes.LinkAddressRequest(addr, tcpip.Address{} /* localAddr */, linkAddr) 321 } 322 323 e.mu.Lock() 324 defer e.mu.Unlock() 325 326 if done { 327 // The timer was stopped because the entry changed state. 328 return 329 } 330 331 if err != nil { 332 e.setStateLocked(Unreachable) 333 e.notifyCompletionLocked(err) 334 e.dispatchChangeEventLocked() 335 return 336 } 337 338 remaining-- 339 e.mu.timer.timer.Reset(config.RetransmitTimer) 340 }), 341 } 342 343 case Unreachable: 344 345 case Unknown, Stale, Static: 346 // Do nothing 347 348 default: 349 panic(fmt.Sprintf("Invalid state transition from %q to %q", prev, next)) 350 } 351 } 352 353 // handlePacketQueuedLocked advances the state machine according to a packet 354 // being queued for outgoing transmission. 355 // 356 // Follows the logic defined in RFC 4861 section 7.3.3. 357 // 358 // Precondition: e.mu MUST be locked. 359 func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) { 360 switch e.mu.neigh.State { 361 case Unknown, Unreachable: 362 prev := e.mu.neigh.State 363 e.mu.neigh.State = Incomplete 364 e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.NowMonotonic() 365 366 switch prev { 367 case Unknown: 368 e.dispatchAddEventLocked() 369 case Unreachable: 370 e.dispatchChangeEventLocked() 371 e.cache.nic.stats.neighbor.unreachableEntryLookups.Increment() 372 } 373 374 config := e.nudState.Config() 375 376 // Protected by e.mu. 377 done := false 378 379 remaining := config.MaxMulticastProbes 380 addr := e.mu.neigh.Addr 381 382 // Send a probe in another gorountine to free this thread of execution 383 // for finishing the state transition. This is necessary to escape the 384 // currently held lock so we can send the probe message without holding 385 // a shared lock. 386 e.mu.timer = timer{ 387 done: &done, 388 timer: e.cache.nic.stack.Clock().AfterFunc(immediateDuration, func() { 389 var err tcpip.Error = &tcpip.ErrTimeout{} 390 if remaining != 0 { 391 // As per RFC 4861 section 7.2.2: 392 // 393 // If the source address of the packet prompting the solicitation is 394 // the same as one of the addresses assigned to the outgoing interface, 395 // that address SHOULD be placed in the IP Source Address of the 396 // outgoing solicitation. 397 // 398 err = e.cache.linkRes.LinkAddressRequest(addr, localAddr, "" /* linkAddr */) 399 } 400 401 e.mu.Lock() 402 defer e.mu.Unlock() 403 404 if done { 405 // The timer was stopped because the entry changed state. 406 return 407 } 408 409 if err != nil { 410 e.setStateLocked(Unreachable) 411 e.notifyCompletionLocked(err) 412 e.dispatchChangeEventLocked() 413 return 414 } 415 416 remaining-- 417 e.mu.timer.timer.Reset(config.RetransmitTimer) 418 }), 419 } 420 421 case Stale: 422 e.setStateLocked(Delay) 423 e.dispatchChangeEventLocked() 424 425 case Incomplete, Reachable, Delay, Probe, Static: 426 // Do nothing 427 default: 428 panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State)) 429 } 430 } 431 432 // handleProbeLocked processes an incoming neighbor probe (e.g. ARP request or 433 // Neighbor Solicitation for ARP or NDP, respectively). 434 // 435 // Follows the logic defined in RFC 4861 section 7.2.3. 436 // 437 // Precondition: e.mu MUST be locked. 438 func (e *neighborEntry) handleProbeLocked(remoteLinkAddr tcpip.LinkAddress) { 439 // Probes MUST be silently discarded if the target address is tentative, does 440 // not exist, or not bound to the NIC as per RFC 4861 section 7.2.3. These 441 // checks MUST be done by the NetworkEndpoint. 442 443 switch e.mu.neigh.State { 444 case Unknown: 445 e.mu.neigh.LinkAddr = remoteLinkAddr 446 e.setStateLocked(Stale) 447 e.dispatchAddEventLocked() 448 449 case Incomplete: 450 // "If an entry already exists, and the cached link-layer address 451 // differs from the one in the received Source Link-Layer option, the 452 // cached address should be replaced by the received address, and the 453 // entry's reachability state MUST be set to STALE." 454 // - RFC 4861 section 7.2.3 455 e.mu.neigh.LinkAddr = remoteLinkAddr 456 e.setStateLocked(Stale) 457 e.notifyCompletionLocked(nil) 458 e.dispatchChangeEventLocked() 459 460 case Reachable, Delay, Probe: 461 if e.mu.neigh.LinkAddr != remoteLinkAddr { 462 e.mu.neigh.LinkAddr = remoteLinkAddr 463 e.setStateLocked(Stale) 464 e.dispatchChangeEventLocked() 465 } 466 467 case Stale: 468 if e.mu.neigh.LinkAddr != remoteLinkAddr { 469 e.mu.neigh.LinkAddr = remoteLinkAddr 470 e.dispatchChangeEventLocked() 471 } 472 473 case Unreachable: 474 // TODO(gvisor.dev/issue/5472): Do not change the entry if the link 475 // address is the same, as per RFC 7048. 476 e.mu.neigh.LinkAddr = remoteLinkAddr 477 e.setStateLocked(Stale) 478 e.dispatchChangeEventLocked() 479 480 case Static: 481 // Do nothing 482 483 default: 484 panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State)) 485 } 486 } 487 488 // handleConfirmationLocked processes an incoming neighbor confirmation 489 // (e.g. ARP reply or Neighbor Advertisement for ARP or NDP, respectively). 490 // 491 // Follows the state machine defined by RFC 4861 section 7.2.5. 492 // 493 // TODO(gvisor.dev/issue/2277): To protect against ARP poisoning and other 494 // attacks against NDP functions, Secure Neighbor Discovery (SEND) Protocol 495 // should be deployed where preventing access to the broadcast segment might 496 // not be possible. SEND uses RSA key pairs to produce Cryptographically 497 // Generated Addresses (CGA), as defined in RFC 3972. This ensures that the 498 // claimed source of an NDP message is the owner of the claimed address. 499 // 500 // Precondition: e.mu MUST be locked. 501 func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) { 502 switch e.mu.neigh.State { 503 case Incomplete: 504 if len(linkAddr) == 0 { 505 // "If the link layer has addresses and no Target Link-Layer Address 506 // option is included, the receiving node SHOULD silently discard the 507 // received advertisement." - RFC 4861 section 7.2.5 508 e.cache.nic.stats.neighbor.droppedInvalidLinkAddressConfirmations.Increment() 509 break 510 } 511 512 e.mu.neigh.LinkAddr = linkAddr 513 if flags.Solicited { 514 e.setStateLocked(Reachable) 515 } else { 516 e.setStateLocked(Stale) 517 } 518 e.dispatchChangeEventLocked() 519 e.mu.isRouter = flags.IsRouter 520 e.notifyCompletionLocked(nil) 521 522 // "Note that the Override flag is ignored if the entry is in the 523 // INCOMPLETE state." - RFC 4861 section 7.2.5 524 525 case Reachable, Stale, Delay, Probe: 526 isLinkAddrDifferent := len(linkAddr) != 0 && e.mu.neigh.LinkAddr != linkAddr 527 528 if isLinkAddrDifferent { 529 if !flags.Override { 530 if e.mu.neigh.State == Reachable { 531 e.setStateLocked(Stale) 532 e.dispatchChangeEventLocked() 533 } 534 break 535 } 536 537 e.mu.neigh.LinkAddr = linkAddr 538 539 if !flags.Solicited { 540 if e.mu.neigh.State != Stale { 541 e.setStateLocked(Stale) 542 e.dispatchChangeEventLocked() 543 } else { 544 // Notify the LinkAddr change, even though NUD state hasn't changed. 545 e.dispatchChangeEventLocked() 546 } 547 break 548 } 549 } 550 551 if flags.Solicited && (flags.Override || !isLinkAddrDifferent) { 552 wasReachable := e.mu.neigh.State == Reachable 553 // Set state to Reachable again to refresh timers. 554 e.setStateLocked(Reachable) 555 e.notifyCompletionLocked(nil) 556 if !wasReachable { 557 e.dispatchChangeEventLocked() 558 } 559 } 560 561 if e.mu.isRouter && !flags.IsRouter && header.IsV6UnicastAddress(e.mu.neigh.Addr) { 562 // "In those cases where the IsRouter flag changes from TRUE to FALSE as 563 // a result of this update, the node MUST remove that router from the 564 // Default Router List and update the Destination Cache entries for all 565 // destinations using that neighbor as a router as specified in Section 566 // 7.3.3. This is needed to detect when a node that is used as a router 567 // stops forwarding packets due to being configured as a host." 568 // - RFC 4861 section 7.2.5 569 // 570 // TODO(gvisor.dev/issue/4085): Remove the special casing we do for IPv6 571 // here. 572 ep := e.cache.nic.getNetworkEndpoint(header.IPv6ProtocolNumber) 573 if ep == nil { 574 panic(fmt.Sprintf("have a neighbor entry for an IPv6 router but no IPv6 network endpoint")) 575 } 576 577 if ndpEP, ok := ep.(NDPEndpoint); ok { 578 ndpEP.InvalidateDefaultRouter(e.mu.neigh.Addr) 579 } 580 } 581 e.mu.isRouter = flags.IsRouter 582 583 case Unknown, Unreachable, Static: 584 // Do nothing 585 586 default: 587 panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State)) 588 } 589 } 590 591 // handleUpperLevelConfirmation processes an incoming upper-level protocol 592 // (e.g. TCP acknowledgements) reachability confirmation. 593 func (e *neighborEntry) handleUpperLevelConfirmation() { 594 tryHandleConfirmation := func() bool { 595 switch e.mu.neigh.State { 596 case Stale, Delay, Probe: 597 return true 598 case Reachable: 599 // Avoid setStateLocked; Timer.Reset is cheaper. 600 // 601 // Note that setting the timer does not need to be protected by the 602 // entry's write lock since we do not modify the timer pointer, but the 603 // time the timer should fire. The timer should have internal locks to 604 // synchronize timer resets changes with the clock. 605 e.mu.timer.timer.Reset(e.nudState.ReachableTime()) 606 return false 607 case Unknown, Incomplete, Unreachable, Static: 608 // Do nothing 609 return false 610 default: 611 panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State)) 612 } 613 } 614 615 e.mu.RLock() 616 needsTransition := tryHandleConfirmation() 617 e.mu.RUnlock() 618 if !needsTransition { 619 return 620 } 621 622 // We need to transition the neighbor to Reachable so take the write lock and 623 // perform the transition, but only if we still need the transition since the 624 // state could have changed since we dropped the read lock above. 625 e.mu.Lock() 626 defer e.mu.Unlock() 627 if needsTransition := tryHandleConfirmation(); needsTransition { 628 e.setStateLocked(Reachable) 629 e.dispatchChangeEventLocked() 630 } 631 } 632 633 // getRemoteLinkAddress returns the entry's link address and whether that link 634 // address is valid. 635 func (e *neighborEntry) getRemoteLinkAddress() (tcpip.LinkAddress, bool) { 636 e.mu.RLock() 637 defer e.mu.RUnlock() 638 switch e.mu.neigh.State { 639 case Reachable, Static, Delay, Probe: 640 return e.mu.neigh.LinkAddr, true 641 case Unknown, Incomplete, Unreachable, Stale: 642 return "", false 643 default: 644 panic(fmt.Sprintf("invalid state for neighbor entry %v: %v", e.mu.neigh, e.mu.neigh.State)) 645 } 646 }