github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/neighbor_entry.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stack 16 17 import ( 18 "fmt" 19 "sync" 20 "time" 21 22 "github.com/SagerNet/gvisor/pkg/tcpip" 23 "github.com/SagerNet/gvisor/pkg/tcpip/header" 24 ) 25 26 const ( 27 // immediateDuration is a duration of zero for scheduling work that needs to 28 // be done immediately but asynchronously to avoid deadlock. 29 immediateDuration time.Duration = 0 30 ) 31 32 // NeighborEntry describes a neighboring device in the local network. 33 type NeighborEntry struct { 34 Addr tcpip.Address 35 LinkAddr tcpip.LinkAddress 36 State NeighborState 37 UpdatedAt time.Time 38 } 39 40 // NeighborState defines the state of a NeighborEntry within the Neighbor 41 // Unreachability Detection state machine, as per RFC 4861 section 7.3.2 and 42 // RFC 7048. 43 type NeighborState uint8 44 45 const ( 46 // Unknown means reachability has not been verified yet. This is the initial 47 // state of entries that have been created automatically by the Neighbor 48 // Unreachability Detection state machine. 49 Unknown NeighborState = iota 50 // Incomplete means that there is an outstanding request to resolve the 51 // address. 52 Incomplete 53 // Reachable means the path to the neighbor is functioning properly for both 54 // receive and transmit paths. 55 Reachable 56 // Stale means reachability to the neighbor is unknown, but packets are still 57 // able to be transmitted to the possibly stale link address. 58 Stale 59 // Delay means reachability to the neighbor is unknown and pending 60 // confirmation from an upper-level protocol like TCP, but packets are still 61 // able to be transmitted to the possibly stale link address. 62 Delay 63 // Probe means a reachability confirmation is actively being sought by 64 // periodically retransmitting reachability probes until a reachability 65 // confirmation is received, or until the maximum number of probes has been 66 // sent. 67 Probe 68 // Static describes entries that have been explicitly added by the user. They 69 // do not expire and are not deleted until explicitly removed. 70 Static 71 // Unreachable means reachability confirmation failed; the maximum number of 72 // reachability probes has been sent and no replies have been received. 73 // 74 // TODO(github.com/SagerNet/issue/5472): Add the following sentence when we implement 75 // RFC 7048: "Packets continue to be sent to the neighbor while 76 // re-attempting to resolve the address." 77 Unreachable 78 ) 79 80 type timer struct { 81 // done indicates to the timer that the timer was stopped. 82 done *bool 83 84 timer tcpip.Timer 85 } 86 87 // neighborEntry implements a neighbor entry's individual node behavior, as per 88 // RFC 4861 section 7.3.3. Neighbor Unreachability Detection operates in 89 // parallel with the sending of packets to a neighbor, necessitating the 90 // entry's lock to be acquired for all operations. 91 type neighborEntry struct { 92 neighborEntryEntry 93 94 cache *neighborCache 95 96 // nudState points to the Neighbor Unreachability Detection configuration. 97 nudState *NUDState 98 99 mu struct { 100 sync.RWMutex 101 102 neigh NeighborEntry 103 104 // done is closed when address resolution is complete. It is nil iff s is 105 // incomplete and resolution is not yet in progress. 106 done chan struct{} 107 108 // onResolve is called with the result of address resolution. 109 onResolve []func(LinkResolutionResult) 110 111 isRouter bool 112 113 timer timer 114 } 115 } 116 117 // newNeighborEntry creates a neighbor cache entry starting at the default 118 // state, Unknown. Transition out of Unknown by calling either 119 // `handlePacketQueuedLocked` or `handleProbeLocked` on the newly created 120 // neighborEntry. 121 func newNeighborEntry(cache *neighborCache, remoteAddr tcpip.Address, nudState *NUDState) *neighborEntry { 122 n := &neighborEntry{ 123 cache: cache, 124 nudState: nudState, 125 } 126 n.mu.Lock() 127 n.mu.neigh = NeighborEntry{ 128 Addr: remoteAddr, 129 State: Unknown, 130 } 131 n.mu.Unlock() 132 return n 133 134 } 135 136 // newStaticNeighborEntry creates a neighbor cache entry starting at the 137 // Static state. The entry can only transition out of Static by directly 138 // calling `setStateLocked`. 139 func newStaticNeighborEntry(cache *neighborCache, addr tcpip.Address, linkAddr tcpip.LinkAddress, state *NUDState) *neighborEntry { 140 entry := NeighborEntry{ 141 Addr: addr, 142 LinkAddr: linkAddr, 143 State: Static, 144 UpdatedAt: cache.nic.stack.clock.Now(), 145 } 146 n := &neighborEntry{ 147 cache: cache, 148 nudState: state, 149 } 150 n.mu.Lock() 151 n.mu.neigh = entry 152 n.mu.Unlock() 153 return n 154 } 155 156 // notifyCompletionLocked notifies those waiting for address resolution, with 157 // the link address if resolution completed successfully. 158 // 159 // Precondition: e.mu MUST be locked. 160 func (e *neighborEntry) notifyCompletionLocked(err tcpip.Error) { 161 res := LinkResolutionResult{LinkAddress: e.mu.neigh.LinkAddr, Err: err} 162 for _, callback := range e.mu.onResolve { 163 callback(res) 164 } 165 e.mu.onResolve = nil 166 if ch := e.mu.done; ch != nil { 167 close(ch) 168 e.mu.done = nil 169 // Dequeue the pending packets asynchronously to not hold up the current 170 // goroutine as writing packets may be a costly operation. 171 // 172 // At the time of writing, when writing packets, a neighbor's link address 173 // is resolved (which ends up obtaining the entry's lock) while holding the 174 // link resolution queue's lock. Dequeuing packets asynchronously avoids a 175 // lock ordering violation. 176 // 177 // NB: this is equivalent to spawning a goroutine directly using the go 178 // keyword but allows tests that use manual clocks to deterministically 179 // wait for this work to complete. 180 e.cache.nic.stack.clock.AfterFunc(0, func() { 181 e.cache.nic.linkResQueue.dequeue(ch, e.mu.neigh.LinkAddr, err) 182 }) 183 } 184 } 185 186 // dispatchAddEventLocked signals to stack's NUD Dispatcher that the entry has 187 // been added. 188 // 189 // Precondition: e.mu MUST be locked. 190 func (e *neighborEntry) dispatchAddEventLocked() { 191 if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { 192 nudDisp.OnNeighborAdded(e.cache.nic.id, e.mu.neigh) 193 } 194 } 195 196 // dispatchChangeEventLocked signals to stack's NUD Dispatcher that the entry 197 // has changed state or link-layer address. 198 // 199 // Precondition: e.mu MUST be locked. 200 func (e *neighborEntry) dispatchChangeEventLocked() { 201 if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { 202 nudDisp.OnNeighborChanged(e.cache.nic.id, e.mu.neigh) 203 } 204 } 205 206 // dispatchRemoveEventLocked signals to stack's NUD Dispatcher that the entry 207 // has been removed. 208 // 209 // Precondition: e.mu MUST be locked. 210 func (e *neighborEntry) dispatchRemoveEventLocked() { 211 if nudDisp := e.cache.nic.stack.nudDisp; nudDisp != nil { 212 nudDisp.OnNeighborRemoved(e.cache.nic.id, e.mu.neigh) 213 } 214 } 215 216 // cancelTimerLocked cancels the currently scheduled action, if there is one. 217 // Entries in Unknown, Stale, or Static state do not have a scheduled action. 218 // 219 // Precondition: e.mu MUST be locked. 220 func (e *neighborEntry) cancelTimerLocked() { 221 if e.mu.timer.timer != nil { 222 e.mu.timer.timer.Stop() 223 *e.mu.timer.done = true 224 225 e.mu.timer = timer{} 226 } 227 } 228 229 // removeLocked prepares the entry for removal. 230 // 231 // Precondition: e.mu MUST be locked. 232 func (e *neighborEntry) removeLocked() { 233 e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.Now() 234 e.dispatchRemoveEventLocked() 235 e.cancelTimerLocked() 236 // TODO(https://github.com/SagerNet/issues/5583): test the case where this function is 237 // called during resolution; that can happen in at least these scenarios: 238 // 239 // - manual address removal during resolution 240 // 241 // - neighbor cache eviction during resolution 242 e.notifyCompletionLocked(&tcpip.ErrAborted{}) 243 } 244 245 // setStateLocked transitions the entry to the specified state immediately. 246 // 247 // Follows the logic defined in RFC 4861 section 7.3.3. 248 // 249 // Precondition: e.mu MUST be locked. 250 func (e *neighborEntry) setStateLocked(next NeighborState) { 251 e.cancelTimerLocked() 252 253 prev := e.mu.neigh.State 254 e.mu.neigh.State = next 255 e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.Now() 256 config := e.nudState.Config() 257 258 switch next { 259 case Incomplete: 260 panic(fmt.Sprintf("should never transition to Incomplete with setStateLocked; neigh = %#v, prev state = %s", e.mu.neigh, prev)) 261 262 case Reachable: 263 // Protected by e.mu. 264 done := false 265 266 e.mu.timer = timer{ 267 done: &done, 268 timer: e.cache.nic.stack.Clock().AfterFunc(e.nudState.ReachableTime(), func() { 269 e.mu.Lock() 270 defer e.mu.Unlock() 271 272 if done { 273 // The timer was stopped because the entry changed state. 274 return 275 } 276 277 e.setStateLocked(Stale) 278 e.dispatchChangeEventLocked() 279 }), 280 } 281 282 case Delay: 283 // Protected by e.mu. 284 done := false 285 286 e.mu.timer = timer{ 287 done: &done, 288 timer: e.cache.nic.stack.Clock().AfterFunc(config.DelayFirstProbeTime, func() { 289 e.mu.Lock() 290 defer e.mu.Unlock() 291 292 if done { 293 // The timer was stopped because the entry changed state. 294 return 295 } 296 297 e.setStateLocked(Probe) 298 e.dispatchChangeEventLocked() 299 }), 300 } 301 302 case Probe: 303 // Protected by e.mu. 304 done := false 305 306 remaining := config.MaxUnicastProbes 307 addr := e.mu.neigh.Addr 308 linkAddr := e.mu.neigh.LinkAddr 309 310 // Send a probe in another gorountine to free this thread of execution 311 // for finishing the state transition. This is necessary to escape the 312 // currently held lock so we can send the probe message without holding 313 // a shared lock. 314 e.mu.timer = timer{ 315 done: &done, 316 timer: e.cache.nic.stack.Clock().AfterFunc(immediateDuration, func() { 317 var err tcpip.Error = &tcpip.ErrTimeout{} 318 if remaining != 0 { 319 err = e.cache.linkRes.LinkAddressRequest(addr, "" /* localAddr */, linkAddr) 320 } 321 322 e.mu.Lock() 323 defer e.mu.Unlock() 324 325 if done { 326 // The timer was stopped because the entry changed state. 327 return 328 } 329 330 if err != nil { 331 e.setStateLocked(Unreachable) 332 e.notifyCompletionLocked(err) 333 e.dispatchChangeEventLocked() 334 return 335 } 336 337 remaining-- 338 e.mu.timer.timer.Reset(config.RetransmitTimer) 339 }), 340 } 341 342 case Unreachable: 343 344 case Unknown, Stale, Static: 345 // Do nothing 346 347 default: 348 panic(fmt.Sprintf("Invalid state transition from %q to %q", prev, next)) 349 } 350 } 351 352 // handlePacketQueuedLocked advances the state machine according to a packet 353 // being queued for outgoing transmission. 354 // 355 // Follows the logic defined in RFC 4861 section 7.3.3. 356 // 357 // Precondition: e.mu MUST be locked. 358 func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) { 359 switch e.mu.neigh.State { 360 case Unknown, Unreachable: 361 prev := e.mu.neigh.State 362 e.mu.neigh.State = Incomplete 363 e.mu.neigh.UpdatedAt = e.cache.nic.stack.clock.Now() 364 365 switch prev { 366 case Unknown: 367 e.dispatchAddEventLocked() 368 case Unreachable: 369 e.dispatchChangeEventLocked() 370 e.cache.nic.stats.neighbor.unreachableEntryLookups.Increment() 371 } 372 373 config := e.nudState.Config() 374 375 // Protected by e.mu. 376 done := false 377 378 remaining := config.MaxMulticastProbes 379 addr := e.mu.neigh.Addr 380 381 // Send a probe in another gorountine to free this thread of execution 382 // for finishing the state transition. This is necessary to escape the 383 // currently held lock so we can send the probe message without holding 384 // a shared lock. 385 e.mu.timer = timer{ 386 done: &done, 387 timer: e.cache.nic.stack.Clock().AfterFunc(immediateDuration, func() { 388 var err tcpip.Error = &tcpip.ErrTimeout{} 389 if remaining != 0 { 390 // As per RFC 4861 section 7.2.2: 391 // 392 // If the source address of the packet prompting the solicitation is 393 // the same as one of the addresses assigned to the outgoing interface, 394 // that address SHOULD be placed in the IP Source Address of the 395 // outgoing solicitation. 396 // 397 err = e.cache.linkRes.LinkAddressRequest(addr, localAddr, "" /* linkAddr */) 398 } 399 400 e.mu.Lock() 401 defer e.mu.Unlock() 402 403 if done { 404 // The timer was stopped because the entry changed state. 405 return 406 } 407 408 if err != nil { 409 e.setStateLocked(Unreachable) 410 e.notifyCompletionLocked(err) 411 e.dispatchChangeEventLocked() 412 return 413 } 414 415 remaining-- 416 e.mu.timer.timer.Reset(config.RetransmitTimer) 417 }), 418 } 419 420 case Stale: 421 e.setStateLocked(Delay) 422 e.dispatchChangeEventLocked() 423 424 case Incomplete, Reachable, Delay, Probe, Static: 425 // Do nothing 426 default: 427 panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State)) 428 } 429 } 430 431 // handleProbeLocked processes an incoming neighbor probe (e.g. ARP request or 432 // Neighbor Solicitation for ARP or NDP, respectively). 433 // 434 // Follows the logic defined in RFC 4861 section 7.2.3. 435 // 436 // Precondition: e.mu MUST be locked. 437 func (e *neighborEntry) handleProbeLocked(remoteLinkAddr tcpip.LinkAddress) { 438 // Probes MUST be silently discarded if the target address is tentative, does 439 // not exist, or not bound to the NIC as per RFC 4861 section 7.2.3. These 440 // checks MUST be done by the NetworkEndpoint. 441 442 switch e.mu.neigh.State { 443 case Unknown: 444 e.mu.neigh.LinkAddr = remoteLinkAddr 445 e.setStateLocked(Stale) 446 e.dispatchAddEventLocked() 447 448 case Incomplete: 449 // "If an entry already exists, and the cached link-layer address 450 // differs from the one in the received Source Link-Layer option, the 451 // cached address should be replaced by the received address, and the 452 // entry's reachability state MUST be set to STALE." 453 // - RFC 4861 section 7.2.3 454 e.mu.neigh.LinkAddr = remoteLinkAddr 455 e.setStateLocked(Stale) 456 e.notifyCompletionLocked(nil) 457 e.dispatchChangeEventLocked() 458 459 case Reachable, Delay, Probe: 460 if e.mu.neigh.LinkAddr != remoteLinkAddr { 461 e.mu.neigh.LinkAddr = remoteLinkAddr 462 e.setStateLocked(Stale) 463 e.dispatchChangeEventLocked() 464 } 465 466 case Stale: 467 if e.mu.neigh.LinkAddr != remoteLinkAddr { 468 e.mu.neigh.LinkAddr = remoteLinkAddr 469 e.dispatchChangeEventLocked() 470 } 471 472 case Unreachable: 473 // TODO(github.com/SagerNet/issue/5472): Do not change the entry if the link 474 // address is the same, as per RFC 7048. 475 e.mu.neigh.LinkAddr = remoteLinkAddr 476 e.setStateLocked(Stale) 477 e.dispatchChangeEventLocked() 478 479 case Static: 480 // Do nothing 481 482 default: 483 panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State)) 484 } 485 } 486 487 // handleConfirmationLocked processes an incoming neighbor confirmation 488 // (e.g. ARP reply or Neighbor Advertisement for ARP or NDP, respectively). 489 // 490 // Follows the state machine defined by RFC 4861 section 7.2.5. 491 // 492 // TODO(github.com/SagerNet/issue/2277): To protect against ARP poisoning and other 493 // attacks against NDP functions, Secure Neighbor Discovery (SEND) Protocol 494 // should be deployed where preventing access to the broadcast segment might 495 // not be possible. SEND uses RSA key pairs to produce Cryptographically 496 // Generated Addresses (CGA), as defined in RFC 3972. This ensures that the 497 // claimed source of an NDP message is the owner of the claimed address. 498 // 499 // Precondition: e.mu MUST be locked. 500 func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) { 501 switch e.mu.neigh.State { 502 case Incomplete: 503 if len(linkAddr) == 0 { 504 // "If the link layer has addresses and no Target Link-Layer Address 505 // option is included, the receiving node SHOULD silently discard the 506 // received advertisement." - RFC 4861 section 7.2.5 507 break 508 } 509 510 e.mu.neigh.LinkAddr = linkAddr 511 if flags.Solicited { 512 e.setStateLocked(Reachable) 513 } else { 514 e.setStateLocked(Stale) 515 } 516 e.dispatchChangeEventLocked() 517 e.mu.isRouter = flags.IsRouter 518 e.notifyCompletionLocked(nil) 519 520 // "Note that the Override flag is ignored if the entry is in the 521 // INCOMPLETE state." - RFC 4861 section 7.2.5 522 523 case Reachable, Stale, Delay, Probe: 524 isLinkAddrDifferent := len(linkAddr) != 0 && e.mu.neigh.LinkAddr != linkAddr 525 526 if isLinkAddrDifferent { 527 if !flags.Override { 528 if e.mu.neigh.State == Reachable { 529 e.setStateLocked(Stale) 530 e.dispatchChangeEventLocked() 531 } 532 break 533 } 534 535 e.mu.neigh.LinkAddr = linkAddr 536 537 if !flags.Solicited { 538 if e.mu.neigh.State != Stale { 539 e.setStateLocked(Stale) 540 e.dispatchChangeEventLocked() 541 } else { 542 // Notify the LinkAddr change, even though NUD state hasn't changed. 543 e.dispatchChangeEventLocked() 544 } 545 break 546 } 547 } 548 549 if flags.Solicited && (flags.Override || !isLinkAddrDifferent) { 550 wasReachable := e.mu.neigh.State == Reachable 551 // Set state to Reachable again to refresh timers. 552 e.setStateLocked(Reachable) 553 e.notifyCompletionLocked(nil) 554 if !wasReachable { 555 e.dispatchChangeEventLocked() 556 } 557 } 558 559 if e.mu.isRouter && !flags.IsRouter && header.IsV6UnicastAddress(e.mu.neigh.Addr) { 560 // "In those cases where the IsRouter flag changes from TRUE to FALSE as 561 // a result of this update, the node MUST remove that router from the 562 // Default Router List and update the Destination Cache entries for all 563 // destinations using that neighbor as a router as specified in Section 564 // 7.3.3. This is needed to detect when a node that is used as a router 565 // stops forwarding packets due to being configured as a host." 566 // - RFC 4861 section 7.2.5 567 // 568 // TODO(github.com/SagerNet/issue/4085): Remove the special casing we do for IPv6 569 // here. 570 ep, ok := e.cache.nic.networkEndpoints[header.IPv6ProtocolNumber] 571 if !ok { 572 panic(fmt.Sprintf("have a neighbor entry for an IPv6 router but no IPv6 network endpoint")) 573 } 574 575 if ndpEP, ok := ep.(NDPEndpoint); ok { 576 ndpEP.InvalidateDefaultRouter(e.mu.neigh.Addr) 577 } 578 } 579 e.mu.isRouter = flags.IsRouter 580 581 case Unknown, Unreachable, Static: 582 // Do nothing 583 584 default: 585 panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State)) 586 } 587 } 588 589 // handleUpperLevelConfirmationLocked processes an incoming upper-level protocol 590 // (e.g. TCP acknowledgements) reachability confirmation. 591 // 592 // Precondition: e.mu MUST be locked. 593 func (e *neighborEntry) handleUpperLevelConfirmationLocked() { 594 switch e.mu.neigh.State { 595 case Reachable, Stale, Delay, Probe: 596 wasReachable := e.mu.neigh.State == Reachable 597 // Set state to Reachable again to refresh timers. 598 e.setStateLocked(Reachable) 599 if !wasReachable { 600 e.dispatchChangeEventLocked() 601 } 602 603 case Unknown, Incomplete, Unreachable, Static: 604 // Do nothing 605 606 default: 607 panic(fmt.Sprintf("Invalid cache entry state: %s", e.mu.neigh.State)) 608 } 609 }