github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/tcpip/stack/stack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package stack provides the glue between networking protocols and the 16 // consumers of the networking stack. 17 // 18 // For consumers, the only function of interest is New(), everything else is 19 // provided by the tcpip/public package. 20 package stack 21 22 import ( 23 "encoding/binary" 24 "fmt" 25 "io" 26 "math/rand" 27 "sync/atomic" 28 "time" 29 30 "golang.org/x/time/rate" 31 "github.com/metacubex/gvisor/pkg/atomicbitops" 32 "github.com/metacubex/gvisor/pkg/buffer" 33 "github.com/metacubex/gvisor/pkg/log" 34 cryptorand "github.com/metacubex/gvisor/pkg/rand" 35 "github.com/metacubex/gvisor/pkg/tcpip" 36 "github.com/metacubex/gvisor/pkg/tcpip/header" 37 "github.com/metacubex/gvisor/pkg/tcpip/ports" 38 "github.com/metacubex/gvisor/pkg/waiter" 39 ) 40 41 const ( 42 // DefaultTOS is the default type of service value for network endpoints. 43 DefaultTOS = 0 44 ) 45 46 type transportProtocolState struct { 47 proto TransportProtocol 48 defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool 49 } 50 51 // RestoredEndpoint is an endpoint that needs to be restored. 52 type RestoredEndpoint interface { 53 // Restore restores an endpoint. This can be used to restart background 54 // workers such as protocol goroutines. This must be called after all 55 // indirect dependencies of the endpoint has been restored, which 56 // generally implies at the end of the restore process. 57 Restore(*Stack) 58 } 59 60 // ResumableEndpoint is an endpoint that needs to be resumed after save. 61 type ResumableEndpoint interface { 62 // Resume resumes an endpoint. 63 Resume() 64 } 65 66 // uniqueIDGenerator is a default unique ID generator. 67 type uniqueIDGenerator atomicbitops.Uint64 68 69 func (u *uniqueIDGenerator) UniqueID() uint64 { 70 return ((*atomicbitops.Uint64)(u)).Add(1) 71 } 72 73 var netRawMissingLogger = log.BasicRateLimitedLogger(time.Minute) 74 75 // Stack is a networking stack, with all supported protocols, NICs, and route 76 // table. 77 // 78 // LOCK ORDERING: mu > routeMu. 79 type Stack struct { 80 transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState 81 networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol 82 83 // rawFactory creates raw endpoints. If nil, raw endpoints are 84 // disabled. It is set during Stack creation and is immutable. 85 rawFactory RawFactory 86 packetEndpointWriteSupported bool 87 88 demux *transportDemuxer 89 90 stats tcpip.Stats 91 92 // routeMu protects annotated fields below. 93 routeMu routeStackRWMutex 94 95 // +checklocks:routeMu 96 routeTable []tcpip.Route 97 98 mu stackRWMutex 99 // +checklocks:mu 100 nics map[tcpip.NICID]*nic 101 defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{} 102 103 // cleanupEndpointsMu protects cleanupEndpoints. 104 cleanupEndpointsMu cleanupEndpointsMutex 105 // +checklocks:cleanupEndpointsMu 106 cleanupEndpoints map[TransportEndpoint]struct{} 107 108 *ports.PortManager 109 110 // If not nil, then any new endpoints will have this probe function 111 // invoked everytime they receive a TCP segment. 112 tcpProbeFunc atomic.Value // TCPProbeFunc 113 114 // clock is used to generate user-visible times. 115 clock tcpip.Clock 116 117 // handleLocal allows non-loopback interfaces to loop packets. 118 handleLocal bool 119 120 // tables are the iptables packet filtering and manipulation rules. 121 // TODO(gvisor.dev/issue/4595): S/R this field. 122 tables *IPTables 123 124 // restoredEndpoints is a list of endpoints that need to be restored if the 125 // stack is being restored. 126 restoredEndpoints []RestoredEndpoint 127 128 // resumableEndpoints is a list of endpoints that need to be resumed 129 // after save. 130 resumableEndpoints []ResumableEndpoint 131 132 // icmpRateLimiter is a global rate limiter for all ICMP messages generated 133 // by the stack. 134 icmpRateLimiter *ICMPRateLimiter 135 136 // seed is a one-time random value initialized at stack startup. 137 // 138 // TODO(gvisor.dev/issue/940): S/R this field. 139 seed uint32 140 141 // nudConfigs is the default NUD configurations used by interfaces. 142 nudConfigs NUDConfigurations 143 144 // nudDisp is the NUD event dispatcher that is used to send the netstack 145 // integrator NUD related events. 146 nudDisp NUDDispatcher 147 148 // uniqueIDGenerator is a generator of unique identifiers. 149 uniqueIDGenerator UniqueID 150 151 // randomGenerator is an injectable pseudo random generator that can be 152 // used when a random number is required. It must not be used in 153 // security-sensitive contexts. 154 insecureRNG *rand.Rand 155 156 // secureRNG is a cryptographically secure random number generator. 157 secureRNG cryptorand.RNG 158 159 // sendBufferSize holds the min/default/max send buffer sizes for 160 // endpoints other than TCP. 161 sendBufferSize tcpip.SendBufferSizeOption 162 163 // receiveBufferSize holds the min/default/max receive buffer sizes for 164 // endpoints other than TCP. 165 receiveBufferSize tcpip.ReceiveBufferSizeOption 166 167 // tcpInvalidRateLimit is the maximal rate for sending duplicate 168 // acknowledgements in response to incoming TCP packets that are for an existing 169 // connection but that are invalid due to any of the following reasons: 170 // 171 // a) out-of-window sequence number. 172 // b) out-of-window acknowledgement number. 173 // c) PAWS check failure (when implemented). 174 // 175 // This is required to prevent potential ACK loops. 176 // Setting this to 0 will disable all rate limiting. 177 tcpInvalidRateLimit time.Duration 178 179 // tsOffsetSecret is the secret key for generating timestamp offsets 180 // initialized at stack startup. 181 tsOffsetSecret uint32 182 } 183 184 // UniqueID is an abstract generator of unique identifiers. 185 type UniqueID interface { 186 UniqueID() uint64 187 } 188 189 // NetworkProtocolFactory instantiates a network protocol. 190 // 191 // NetworkProtocolFactory must not attempt to modify the stack, it may only 192 // query the stack. 193 type NetworkProtocolFactory func(*Stack) NetworkProtocol 194 195 // TransportProtocolFactory instantiates a transport protocol. 196 // 197 // TransportProtocolFactory must not attempt to modify the stack, it may only 198 // query the stack. 199 type TransportProtocolFactory func(*Stack) TransportProtocol 200 201 // Options contains optional Stack configuration. 202 type Options struct { 203 // NetworkProtocols lists the network protocols to enable. 204 NetworkProtocols []NetworkProtocolFactory 205 206 // TransportProtocols lists the transport protocols to enable. 207 TransportProtocols []TransportProtocolFactory 208 209 // Clock is an optional clock used for timekeeping. 210 // 211 // If Clock is nil, tcpip.NewStdClock() will be used. 212 Clock tcpip.Clock 213 214 // Stats are optional statistic counters. 215 Stats tcpip.Stats 216 217 // HandleLocal indicates whether packets destined to their source 218 // should be handled by the stack internally (true) or outside the 219 // stack (false). 220 HandleLocal bool 221 222 // UniqueID is an optional generator of unique identifiers. 223 UniqueID UniqueID 224 225 // NUDConfigs is the default NUD configurations used by interfaces. 226 NUDConfigs NUDConfigurations 227 228 // NUDDisp is the NUD event dispatcher that an integrator can provide to 229 // receive NUD related events. 230 NUDDisp NUDDispatcher 231 232 // RawFactory produces raw endpoints. Raw endpoints are enabled only if 233 // this is non-nil. 234 RawFactory RawFactory 235 236 // AllowPacketEndpointWrite determines if packet endpoints support write 237 // operations. 238 AllowPacketEndpointWrite bool 239 240 // RandSource is an optional source to use to generate random 241 // numbers. If omitted it defaults to a Source seeded by the data 242 // returned by the stack secure RNG. 243 // 244 // RandSource must be thread-safe. 245 RandSource rand.Source 246 247 // IPTables are the initial iptables rules. If nil, DefaultIPTables will be 248 // used to construct the initial iptables rules. 249 // all traffic. 250 IPTables *IPTables 251 252 // DefaultIPTables is an optional iptables rules constructor that is called 253 // if IPTables is nil. If both fields are nil, iptables will allow all 254 // traffic. 255 DefaultIPTables func(clock tcpip.Clock, rand *rand.Rand) *IPTables 256 257 // SecureRNG is a cryptographically secure random number generator. 258 SecureRNG io.Reader 259 } 260 261 // TransportEndpointInfo holds useful information about a transport endpoint 262 // which can be queried by monitoring tools. 263 // 264 // +stateify savable 265 type TransportEndpointInfo struct { 266 // The following fields are initialized at creation time and are 267 // immutable. 268 269 NetProto tcpip.NetworkProtocolNumber 270 TransProto tcpip.TransportProtocolNumber 271 272 // The following fields are protected by endpoint mu. 273 274 ID TransportEndpointID 275 // BindNICID and bindAddr are set via calls to Bind(). They are used to 276 // reject attempts to send data or connect via a different NIC or 277 // address 278 BindNICID tcpip.NICID 279 BindAddr tcpip.Address 280 // RegisterNICID is the default NICID registered as a side-effect of 281 // connect or datagram write. 282 RegisterNICID tcpip.NICID 283 } 284 285 // AddrNetProtoLocked unwraps the specified address if it is a V4-mapped V6 286 // address and returns the network protocol number to be used to communicate 287 // with the specified address. It returns an error if the passed address is 288 // incompatible with the receiver. 289 // 290 // Preconditon: the parent endpoint mu must be held while calling this method. 291 func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { 292 netProto := t.NetProto 293 switch addr.Addr.BitLen() { 294 case header.IPv4AddressSizeBits: 295 netProto = header.IPv4ProtocolNumber 296 case header.IPv6AddressSizeBits: 297 if header.IsV4MappedAddress(addr.Addr) { 298 netProto = header.IPv4ProtocolNumber 299 addr.Addr = tcpip.AddrFrom4Slice(addr.Addr.AsSlice()[header.IPv6AddressSize-header.IPv4AddressSize:]) 300 if addr.Addr == header.IPv4Any { 301 addr.Addr = tcpip.Address{} 302 } 303 } 304 } 305 306 switch t.ID.LocalAddress.BitLen() { 307 case header.IPv4AddressSizeBits: 308 if addr.Addr.BitLen() == header.IPv6AddressSizeBits { 309 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 310 } 311 case header.IPv6AddressSizeBits: 312 if addr.Addr.BitLen() == header.IPv4AddressSizeBits { 313 return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{} 314 } 315 } 316 317 switch { 318 case netProto == t.NetProto: 319 case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber: 320 if v6only { 321 return tcpip.FullAddress{}, 0, &tcpip.ErrHostUnreachable{} 322 } 323 default: 324 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 325 } 326 327 return addr, netProto, nil 328 } 329 330 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo 331 // marker interface. 332 func (*TransportEndpointInfo) IsEndpointInfo() {} 333 334 // New allocates a new networking stack with only the requested networking and 335 // transport protocols configured with default options. 336 // 337 // Note, NDPConfigurations will be fixed before being used by the Stack. That 338 // is, if an invalid value was provided, it will be reset to the default value. 339 // 340 // Protocol options can be changed by calling the 341 // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the 342 // stack. Please refer to individual protocol implementations as to what options 343 // are supported. 344 func New(opts Options) *Stack { 345 clock := opts.Clock 346 if clock == nil { 347 clock = tcpip.NewStdClock() 348 } 349 350 if opts.UniqueID == nil { 351 opts.UniqueID = new(uniqueIDGenerator) 352 } 353 354 if opts.SecureRNG == nil { 355 opts.SecureRNG = cryptorand.Reader 356 } 357 secureRNG := cryptorand.RNGFrom(opts.SecureRNG) 358 359 randSrc := opts.RandSource 360 if randSrc == nil { 361 var v int64 362 if err := binary.Read(opts.SecureRNG, binary.LittleEndian, &v); err != nil { 363 panic(err) 364 } 365 // Source provided by rand.NewSource is not thread-safe so 366 // we wrap it in a simple thread-safe version. 367 randSrc = &lockedRandomSource{src: rand.NewSource(v)} 368 } 369 insecureRNG := rand.New(randSrc) 370 371 if opts.IPTables == nil { 372 if opts.DefaultIPTables == nil { 373 opts.DefaultIPTables = DefaultTables 374 } 375 opts.IPTables = opts.DefaultIPTables(clock, insecureRNG) 376 } 377 378 opts.NUDConfigs.resetInvalidFields() 379 380 s := &Stack{ 381 transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState), 382 networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol), 383 nics: make(map[tcpip.NICID]*nic), 384 packetEndpointWriteSupported: opts.AllowPacketEndpointWrite, 385 defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}), 386 cleanupEndpoints: make(map[TransportEndpoint]struct{}), 387 PortManager: ports.NewPortManager(), 388 clock: clock, 389 stats: opts.Stats.FillIn(), 390 handleLocal: opts.HandleLocal, 391 tables: opts.IPTables, 392 icmpRateLimiter: NewICMPRateLimiter(clock), 393 seed: secureRNG.Uint32(), 394 nudConfigs: opts.NUDConfigs, 395 uniqueIDGenerator: opts.UniqueID, 396 nudDisp: opts.NUDDisp, 397 insecureRNG: insecureRNG, 398 secureRNG: secureRNG, 399 sendBufferSize: tcpip.SendBufferSizeOption{ 400 Min: MinBufferSize, 401 Default: DefaultBufferSize, 402 Max: DefaultMaxBufferSize, 403 }, 404 receiveBufferSize: tcpip.ReceiveBufferSizeOption{ 405 Min: MinBufferSize, 406 Default: DefaultBufferSize, 407 Max: DefaultMaxBufferSize, 408 }, 409 tcpInvalidRateLimit: defaultTCPInvalidRateLimit, 410 tsOffsetSecret: secureRNG.Uint32(), 411 } 412 413 // Add specified network protocols. 414 for _, netProtoFactory := range opts.NetworkProtocols { 415 netProto := netProtoFactory(s) 416 s.networkProtocols[netProto.Number()] = netProto 417 } 418 419 // Add specified transport protocols. 420 for _, transProtoFactory := range opts.TransportProtocols { 421 transProto := transProtoFactory(s) 422 s.transportProtocols[transProto.Number()] = &transportProtocolState{ 423 proto: transProto, 424 } 425 } 426 427 // Add the factory for raw endpoints, if present. 428 s.rawFactory = opts.RawFactory 429 430 // Create the global transport demuxer. 431 s.demux = newTransportDemuxer(s) 432 433 return s 434 } 435 436 // UniqueID returns a unique identifier. 437 func (s *Stack) UniqueID() uint64 { 438 return s.uniqueIDGenerator.UniqueID() 439 } 440 441 // SetNetworkProtocolOption allows configuring individual protocol level 442 // options. This method returns an error if the protocol is not supported or 443 // option is not supported by the protocol implementation or the provided value 444 // is incorrect. 445 func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error { 446 netProto, ok := s.networkProtocols[network] 447 if !ok { 448 return &tcpip.ErrUnknownProtocol{} 449 } 450 return netProto.SetOption(option) 451 } 452 453 // NetworkProtocolOption allows retrieving individual protocol level option 454 // values. This method returns an error if the protocol is not supported or 455 // option is not supported by the protocol implementation. E.g.: 456 // 457 // var v ipv4.MyOption 458 // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v) 459 // if err != nil { 460 // ... 461 // } 462 func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error { 463 netProto, ok := s.networkProtocols[network] 464 if !ok { 465 return &tcpip.ErrUnknownProtocol{} 466 } 467 return netProto.Option(option) 468 } 469 470 // SetTransportProtocolOption allows configuring individual protocol level 471 // options. This method returns an error if the protocol is not supported or 472 // option is not supported by the protocol implementation or the provided value 473 // is incorrect. 474 func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error { 475 transProtoState, ok := s.transportProtocols[transport] 476 if !ok { 477 return &tcpip.ErrUnknownProtocol{} 478 } 479 return transProtoState.proto.SetOption(option) 480 } 481 482 // TransportProtocolOption allows retrieving individual protocol level option 483 // values. This method returns an error if the protocol is not supported or 484 // option is not supported by the protocol implementation. 485 // 486 // var v tcp.SACKEnabled 487 // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil { 488 // ... 489 // } 490 func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error { 491 transProtoState, ok := s.transportProtocols[transport] 492 if !ok { 493 return &tcpip.ErrUnknownProtocol{} 494 } 495 return transProtoState.proto.Option(option) 496 } 497 498 // SendBufSizeProto is a protocol that can return its send buffer size. 499 type SendBufSizeProto interface { 500 SendBufferSize() tcpip.TCPSendBufferSizeRangeOption 501 } 502 503 // TCPSendBufferLimits returns the TCP send buffer size limit. 504 func (s *Stack) TCPSendBufferLimits() tcpip.TCPSendBufferSizeRangeOption { 505 return s.transportProtocols[header.TCPProtocolNumber].proto.(SendBufSizeProto).SendBufferSize() 506 } 507 508 // SetTransportProtocolHandler sets the per-stack default handler for the given 509 // protocol. 510 // 511 // It must be called only during initialization of the stack. Changing it as the 512 // stack is operating is not supported. 513 func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, *PacketBuffer) bool) { 514 state := s.transportProtocols[p] 515 if state != nil { 516 state.defaultHandler = h 517 } 518 } 519 520 // Clock returns the Stack's clock for retrieving the current time and 521 // scheduling work. 522 func (s *Stack) Clock() tcpip.Clock { 523 return s.clock 524 } 525 526 // Stats returns a mutable copy of the current stats. 527 // 528 // This is not generally exported via the public interface, but is available 529 // internally. 530 func (s *Stack) Stats() tcpip.Stats { 531 return s.stats 532 } 533 534 // SetNICForwarding enables or disables packet forwarding on the specified NIC 535 // for the passed protocol. 536 // 537 // Returns the previous configuration on the NIC. 538 func (s *Stack) SetNICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 539 s.mu.RLock() 540 defer s.mu.RUnlock() 541 542 nic, ok := s.nics[id] 543 if !ok { 544 return false, &tcpip.ErrUnknownNICID{} 545 } 546 547 return nic.setForwarding(protocol, enable) 548 } 549 550 // NICForwarding returns the forwarding configuration for the specified NIC. 551 func (s *Stack) NICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 552 s.mu.RLock() 553 defer s.mu.RUnlock() 554 555 nic, ok := s.nics[id] 556 if !ok { 557 return false, &tcpip.ErrUnknownNICID{} 558 } 559 560 return nic.forwarding(protocol) 561 } 562 563 // SetForwardingDefaultAndAllNICs sets packet forwarding for all NICs for the 564 // passed protocol and sets the default setting for newly created NICs. 565 func (s *Stack) SetForwardingDefaultAndAllNICs(protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error { 566 s.mu.Lock() 567 defer s.mu.Unlock() 568 569 doneOnce := false 570 for id, nic := range s.nics { 571 if _, err := nic.setForwarding(protocol, enable); err != nil { 572 // Expect forwarding to be settable on all interfaces if it was set on 573 // one. 574 if doneOnce { 575 panic(fmt.Sprintf("nic(id=%d).setForwarding(%d, %t): %s", id, protocol, enable, err)) 576 } 577 578 return err 579 } 580 581 doneOnce = true 582 } 583 584 if enable { 585 s.defaultForwardingEnabled[protocol] = struct{}{} 586 } else { 587 delete(s.defaultForwardingEnabled, protocol) 588 } 589 590 return nil 591 } 592 593 // AddMulticastRoute adds a multicast route to be used for the specified 594 // addresses and protocol. 595 func (s *Stack) AddMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination, route MulticastRoute) tcpip.Error { 596 netProto, ok := s.networkProtocols[protocol] 597 if !ok { 598 return &tcpip.ErrUnknownProtocol{} 599 } 600 601 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 602 if !ok { 603 return &tcpip.ErrNotSupported{} 604 } 605 606 return forwardingNetProto.AddMulticastRoute(addresses, route) 607 } 608 609 // RemoveMulticastRoute removes a multicast route that matches the specified 610 // addresses and protocol. 611 func (s *Stack) RemoveMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) tcpip.Error { 612 netProto, ok := s.networkProtocols[protocol] 613 if !ok { 614 return &tcpip.ErrUnknownProtocol{} 615 } 616 617 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 618 if !ok { 619 return &tcpip.ErrNotSupported{} 620 } 621 622 return forwardingNetProto.RemoveMulticastRoute(addresses) 623 } 624 625 // MulticastRouteLastUsedTime returns a monotonic timestamp that represents the 626 // last time that the route that matches the provided addresses and protocol 627 // was used or updated. 628 func (s *Stack) MulticastRouteLastUsedTime(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 629 netProto, ok := s.networkProtocols[protocol] 630 if !ok { 631 return tcpip.MonotonicTime{}, &tcpip.ErrUnknownProtocol{} 632 } 633 634 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 635 if !ok { 636 return tcpip.MonotonicTime{}, &tcpip.ErrNotSupported{} 637 } 638 639 return forwardingNetProto.MulticastRouteLastUsedTime(addresses) 640 } 641 642 // EnableMulticastForwardingForProtocol enables multicast forwarding for the 643 // provided protocol. 644 // 645 // Returns true if forwarding was already enabled on the protocol. 646 // Additionally, returns an error if: 647 // 648 // - The protocol is not found. 649 // - The protocol doesn't support multicast forwarding. 650 // - The multicast forwarding event dispatcher is nil. 651 // 652 // If successful, future multicast forwarding events will be sent to the 653 // provided event dispatcher. 654 func (s *Stack) EnableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber, disp MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 655 netProto, ok := s.networkProtocols[protocol] 656 if !ok { 657 return false, &tcpip.ErrUnknownProtocol{} 658 } 659 660 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 661 if !ok { 662 return false, &tcpip.ErrNotSupported{} 663 } 664 665 return forwardingNetProto.EnableMulticastForwarding(disp) 666 } 667 668 // DisableMulticastForwardingForProtocol disables multicast forwarding for the 669 // provided protocol. 670 // 671 // Returns an error if the provided protocol is not found or if it does not 672 // support multicast forwarding. 673 func (s *Stack) DisableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber) tcpip.Error { 674 netProto, ok := s.networkProtocols[protocol] 675 if !ok { 676 return &tcpip.ErrUnknownProtocol{} 677 } 678 679 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 680 if !ok { 681 return &tcpip.ErrNotSupported{} 682 } 683 684 forwardingNetProto.DisableMulticastForwarding() 685 return nil 686 } 687 688 // SetNICMulticastForwarding enables or disables multicast packet forwarding on 689 // the specified NIC for the passed protocol. 690 // 691 // Returns the previous configuration on the NIC. 692 // 693 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 694 // forwarding. Currently, setting this value is a no-op and is not ready for 695 // use. 696 func (s *Stack) SetNICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 697 s.mu.RLock() 698 defer s.mu.RUnlock() 699 700 nic, ok := s.nics[id] 701 if !ok { 702 return false, &tcpip.ErrUnknownNICID{} 703 } 704 705 return nic.setMulticastForwarding(protocol, enable) 706 } 707 708 // NICMulticastForwarding returns the multicast forwarding configuration for 709 // the specified NIC. 710 func (s *Stack) NICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 711 s.mu.RLock() 712 defer s.mu.RUnlock() 713 714 nic, ok := s.nics[id] 715 if !ok { 716 return false, &tcpip.ErrUnknownNICID{} 717 } 718 719 return nic.multicastForwarding(protocol) 720 } 721 722 // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in 723 // both IPv4 and IPv6. 724 func (s *Stack) PortRange() (uint16, uint16) { 725 return s.PortManager.PortRange() 726 } 727 728 // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range 729 // (inclusive). 730 func (s *Stack) SetPortRange(start uint16, end uint16) tcpip.Error { 731 return s.PortManager.SetPortRange(start, end) 732 } 733 734 // GROTimeout returns the GRO timeout. 735 func (s *Stack) GROTimeout(nicID tcpip.NICID) (time.Duration, tcpip.Error) { 736 s.mu.RLock() 737 defer s.mu.RUnlock() 738 739 nic, ok := s.nics[nicID] 740 if !ok { 741 return 0, &tcpip.ErrUnknownNICID{} 742 } 743 744 return nic.gro.getInterval(), nil 745 } 746 747 // SetGROTimeout sets the GRO timeout. 748 func (s *Stack) SetGROTimeout(nicID tcpip.NICID, timeout time.Duration) tcpip.Error { 749 s.mu.RLock() 750 defer s.mu.RUnlock() 751 752 nic, ok := s.nics[nicID] 753 if !ok { 754 return &tcpip.ErrUnknownNICID{} 755 } 756 757 nic.gro.setInterval(timeout) 758 return nil 759 } 760 761 // SetRouteTable assigns the route table to be used by this stack. It 762 // specifies which NIC to use for given destination address ranges. 763 // 764 // This method takes ownership of the table. 765 func (s *Stack) SetRouteTable(table []tcpip.Route) { 766 s.routeMu.Lock() 767 defer s.routeMu.Unlock() 768 s.routeTable = table 769 } 770 771 // GetRouteTable returns the route table which is currently in use. 772 func (s *Stack) GetRouteTable() []tcpip.Route { 773 s.routeMu.RLock() 774 defer s.routeMu.RUnlock() 775 return append([]tcpip.Route(nil), s.routeTable...) 776 } 777 778 // AddRoute appends a route to the route table. 779 func (s *Stack) AddRoute(route tcpip.Route) { 780 s.routeMu.Lock() 781 defer s.routeMu.Unlock() 782 s.routeTable = append(s.routeTable, route) 783 } 784 785 // RemoveRoutes removes matching routes from the route table. 786 func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) { 787 s.routeMu.Lock() 788 defer s.routeMu.Unlock() 789 790 var filteredRoutes []tcpip.Route 791 for _, route := range s.routeTable { 792 if !match(route) { 793 filteredRoutes = append(filteredRoutes, route) 794 } 795 } 796 s.routeTable = filteredRoutes 797 } 798 799 // NewEndpoint creates a new transport layer endpoint of the given protocol. 800 func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 801 t, ok := s.transportProtocols[transport] 802 if !ok { 803 return nil, &tcpip.ErrUnknownProtocol{} 804 } 805 806 return t.proto.NewEndpoint(network, waiterQueue) 807 } 808 809 // NewRawEndpoint creates a new raw transport layer endpoint of the given 810 // protocol. Raw endpoints receive all traffic for a given protocol regardless 811 // of address. 812 func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) { 813 if s.rawFactory == nil { 814 netRawMissingLogger.Infof("A process tried to create a raw socket, but --net-raw was not specified. Should runsc be run with --net-raw?") 815 return nil, &tcpip.ErrNotPermitted{} 816 } 817 818 if !associated { 819 return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue) 820 } 821 822 t, ok := s.transportProtocols[transport] 823 if !ok { 824 return nil, &tcpip.ErrUnknownProtocol{} 825 } 826 827 return t.proto.NewRawEndpoint(network, waiterQueue) 828 } 829 830 // NewPacketEndpoint creates a new packet endpoint listening for the given 831 // netProto. 832 func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 833 if s.rawFactory == nil { 834 return nil, &tcpip.ErrNotPermitted{} 835 } 836 837 return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue) 838 } 839 840 // NICContext is an opaque pointer used to store client-supplied NIC metadata. 841 type NICContext any 842 843 // NICOptions specifies the configuration of a NIC as it is being created. 844 // The zero value creates an enabled, unnamed NIC. 845 type NICOptions struct { 846 // Name specifies the name of the NIC. 847 Name string 848 849 // Disabled specifies whether to avoid calling Attach on the passed 850 // LinkEndpoint. 851 Disabled bool 852 853 // Context specifies user-defined data that will be returned in stack.NICInfo 854 // for the NIC. Clients of this library can use it to add metadata that 855 // should be tracked alongside a NIC, to avoid having to keep a 856 // map[tcpip.NICID]metadata mirroring stack.Stack's nic map. 857 Context NICContext 858 859 // QDisc is the queue discipline to use for this NIC. 860 QDisc QueueingDiscipline 861 862 // GROTimeout specifies the GRO timeout. Zero bypasses GRO. 863 GROTimeout time.Duration 864 865 // DeliverLinkPackets specifies whether the NIC is responsible for 866 // delivering raw packets to packet sockets. 867 DeliverLinkPackets bool 868 } 869 870 // CreateNICWithOptions creates a NIC with the provided id, LinkEndpoint, and 871 // NICOptions. See the documentation on type NICOptions for details on how 872 // NICs can be configured. 873 // 874 // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher. 875 func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error { 876 s.mu.Lock() 877 defer s.mu.Unlock() 878 879 // Make sure id is unique. 880 if _, ok := s.nics[id]; ok { 881 return &tcpip.ErrDuplicateNICID{} 882 } 883 884 // Make sure name is unique, unless unnamed. 885 if opts.Name != "" { 886 for _, n := range s.nics { 887 if n.Name() == opts.Name { 888 return &tcpip.ErrDuplicateNICID{} 889 } 890 } 891 } 892 893 n := newNIC(s, id, ep, opts) 894 for proto := range s.defaultForwardingEnabled { 895 if _, err := n.setForwarding(proto, true); err != nil { 896 panic(fmt.Sprintf("newNIC(%d, ...).setForwarding(%d, true): %s", id, proto, err)) 897 } 898 } 899 s.nics[id] = n 900 if !opts.Disabled { 901 return n.enable() 902 } 903 904 return nil 905 } 906 907 // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls 908 // LinkEndpoint.Attach to bind ep with a NetworkDispatcher. 909 func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error { 910 return s.CreateNICWithOptions(id, ep, NICOptions{}) 911 } 912 913 // GetLinkEndpointByName gets the link endpoint specified by name. 914 func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint { 915 s.mu.RLock() 916 defer s.mu.RUnlock() 917 for _, nic := range s.nics { 918 if nic.Name() == name { 919 linkEP, ok := nic.NetworkLinkEndpoint.(LinkEndpoint) 920 if !ok { 921 panic(fmt.Sprintf("unexpected NetworkLinkEndpoint(%#v) is not a LinkEndpoint", nic.NetworkLinkEndpoint)) 922 } 923 return linkEP 924 } 925 } 926 return nil 927 } 928 929 // EnableNIC enables the given NIC so that the link-layer endpoint can start 930 // delivering packets to it. 931 func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error { 932 s.mu.RLock() 933 defer s.mu.RUnlock() 934 935 nic, ok := s.nics[id] 936 if !ok { 937 return &tcpip.ErrUnknownNICID{} 938 } 939 940 return nic.enable() 941 } 942 943 // DisableNIC disables the given NIC. 944 func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error { 945 s.mu.RLock() 946 defer s.mu.RUnlock() 947 948 nic, ok := s.nics[id] 949 if !ok { 950 return &tcpip.ErrUnknownNICID{} 951 } 952 953 nic.disable() 954 return nil 955 } 956 957 // CheckNIC checks if a NIC is usable. 958 func (s *Stack) CheckNIC(id tcpip.NICID) bool { 959 s.mu.RLock() 960 defer s.mu.RUnlock() 961 962 nic, ok := s.nics[id] 963 if !ok { 964 return false 965 } 966 967 return nic.Enabled() 968 } 969 970 // RemoveNIC removes NIC and all related routes from the network stack. 971 func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error { 972 s.mu.Lock() 973 defer s.mu.Unlock() 974 975 return s.removeNICLocked(id) 976 } 977 978 // removeNICLocked removes NIC and all related routes from the network stack. 979 // 980 // +checklocks:s.mu 981 func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error { 982 nic, ok := s.nics[id] 983 if !ok { 984 return &tcpip.ErrUnknownNICID{} 985 } 986 delete(s.nics, id) 987 988 // Remove routes in-place. n tracks the number of routes written. 989 s.routeMu.Lock() 990 n := 0 991 for i, r := range s.routeTable { 992 s.routeTable[i] = tcpip.Route{} 993 if r.NIC != id { 994 // Keep this route. 995 s.routeTable[n] = r 996 n++ 997 } 998 } 999 s.routeTable = s.routeTable[:n] 1000 s.routeMu.Unlock() 1001 1002 return nic.remove() 1003 } 1004 1005 // NICInfo captures the name and addresses assigned to a NIC. 1006 type NICInfo struct { 1007 Name string 1008 LinkAddress tcpip.LinkAddress 1009 ProtocolAddresses []tcpip.ProtocolAddress 1010 1011 // Flags indicate the state of the NIC. 1012 Flags NICStateFlags 1013 1014 // MTU is the maximum transmission unit. 1015 MTU uint32 1016 1017 Stats tcpip.NICStats 1018 1019 // NetworkStats holds the stats of each NetworkEndpoint bound to the NIC. 1020 NetworkStats map[tcpip.NetworkProtocolNumber]NetworkEndpointStats 1021 1022 // Context is user-supplied data optionally supplied in CreateNICWithOptions. 1023 // See type NICOptions for more details. 1024 Context NICContext 1025 1026 // ARPHardwareType holds the ARP Hardware type of the NIC. This is the 1027 // value sent in haType field of an ARP Request sent by this NIC and the 1028 // value expected in the haType field of an ARP response. 1029 ARPHardwareType header.ARPHardwareType 1030 1031 // Forwarding holds the forwarding status for each network endpoint that 1032 // supports forwarding. 1033 Forwarding map[tcpip.NetworkProtocolNumber]bool 1034 1035 // MulticastForwarding holds the forwarding status for each network endpoint 1036 // that supports multicast forwarding. 1037 MulticastForwarding map[tcpip.NetworkProtocolNumber]bool 1038 } 1039 1040 // HasNIC returns true if the NICID is defined in the stack. 1041 func (s *Stack) HasNIC(id tcpip.NICID) bool { 1042 s.mu.RLock() 1043 _, ok := s.nics[id] 1044 s.mu.RUnlock() 1045 return ok 1046 } 1047 1048 // NICInfo returns a map of NICIDs to their associated information. 1049 func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo { 1050 s.mu.RLock() 1051 defer s.mu.RUnlock() 1052 1053 type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error) 1054 forwardingValue := func(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) { 1055 switch forwarding, err := forwardingFn(proto); err.(type) { 1056 case nil: 1057 return forwarding, true 1058 case *tcpip.ErrUnknownProtocol: 1059 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID)) 1060 case *tcpip.ErrNotSupported: 1061 // Not all network protocols support forwarding. 1062 default: 1063 panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err)) 1064 } 1065 return false, false 1066 } 1067 1068 nics := make(map[tcpip.NICID]NICInfo) 1069 for id, nic := range s.nics { 1070 flags := NICStateFlags{ 1071 Up: true, // Netstack interfaces are always up. 1072 Running: nic.Enabled(), 1073 Promiscuous: nic.Promiscuous(), 1074 Loopback: nic.IsLoopback(), 1075 } 1076 1077 netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats) 1078 for proto, netEP := range nic.networkEndpoints { 1079 netStats[proto] = netEP.Stats() 1080 } 1081 1082 info := NICInfo{ 1083 Name: nic.name, 1084 LinkAddress: nic.NetworkLinkEndpoint.LinkAddress(), 1085 ProtocolAddresses: nic.primaryAddresses(), 1086 Flags: flags, 1087 MTU: nic.NetworkLinkEndpoint.MTU(), 1088 Stats: nic.stats.local, 1089 NetworkStats: netStats, 1090 Context: nic.context, 1091 ARPHardwareType: nic.NetworkLinkEndpoint.ARPHardwareType(), 1092 Forwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1093 MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1094 } 1095 1096 for proto := range s.networkProtocols { 1097 if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok { 1098 info.Forwarding[proto] = forwarding 1099 } 1100 1101 if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok { 1102 info.MulticastForwarding[proto] = multicastForwarding 1103 } 1104 } 1105 1106 nics[id] = info 1107 } 1108 return nics 1109 } 1110 1111 // NICStateFlags holds information about the state of an NIC. 1112 type NICStateFlags struct { 1113 // Up indicates whether the interface is running. 1114 Up bool 1115 1116 // Running indicates whether resources are allocated. 1117 Running bool 1118 1119 // Promiscuous indicates whether the interface is in promiscuous mode. 1120 Promiscuous bool 1121 1122 // Loopback indicates whether the interface is a loopback. 1123 Loopback bool 1124 } 1125 1126 // AddProtocolAddress adds an address to the specified NIC, possibly with extra 1127 // properties. 1128 func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error { 1129 s.mu.RLock() 1130 defer s.mu.RUnlock() 1131 1132 nic, ok := s.nics[id] 1133 if !ok { 1134 return &tcpip.ErrUnknownNICID{} 1135 } 1136 1137 return nic.addAddress(protocolAddress, properties) 1138 } 1139 1140 // RemoveAddress removes an existing network-layer address from the specified 1141 // NIC. 1142 func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error { 1143 s.mu.RLock() 1144 defer s.mu.RUnlock() 1145 1146 if nic, ok := s.nics[id]; ok { 1147 return nic.removeAddress(addr) 1148 } 1149 1150 return &tcpip.ErrUnknownNICID{} 1151 } 1152 1153 // SetAddressLifetimes sets informational preferred and valid lifetimes, and 1154 // whether the address should be preferred or deprecated. 1155 func (s *Stack) SetAddressLifetimes(id tcpip.NICID, addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error { 1156 s.mu.RLock() 1157 defer s.mu.RUnlock() 1158 1159 if nic, ok := s.nics[id]; ok { 1160 return nic.setAddressLifetimes(addr, lifetimes) 1161 } 1162 1163 return &tcpip.ErrUnknownNICID{} 1164 } 1165 1166 // AllAddresses returns a map of NICIDs to their protocol addresses (primary 1167 // and non-primary). 1168 func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress { 1169 s.mu.RLock() 1170 defer s.mu.RUnlock() 1171 1172 nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress) 1173 for id, nic := range s.nics { 1174 nics[id] = nic.allPermanentAddresses() 1175 } 1176 return nics 1177 } 1178 1179 // GetMainNICAddress returns the first non-deprecated primary address and prefix 1180 // for the given NIC and protocol. If no non-deprecated primary addresses exist, 1181 // a deprecated address will be returned. If no deprecated addresses exist, the 1182 // zero value will be returned. 1183 func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) { 1184 s.mu.RLock() 1185 defer s.mu.RUnlock() 1186 1187 nic, ok := s.nics[id] 1188 if !ok { 1189 return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownNICID{} 1190 } 1191 1192 return nic.PrimaryAddress(protocol) 1193 } 1194 1195 func (s *Stack) getAddressEP(nic *nic, localAddr, remoteAddr, srcHint tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint { 1196 if localAddr.BitLen() == 0 { 1197 return nic.primaryEndpoint(netProto, remoteAddr, srcHint) 1198 } 1199 return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint) 1200 } 1201 1202 // NewRouteForMulticast returns a Route that may be used to forward multicast 1203 // packets. 1204 // 1205 // Returns nil if validation fails. 1206 func (s *Stack) NewRouteForMulticast(nicID tcpip.NICID, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1207 s.mu.RLock() 1208 defer s.mu.RUnlock() 1209 1210 nic, ok := s.nics[nicID] 1211 if !ok || !nic.Enabled() { 1212 return nil 1213 } 1214 1215 if addressEndpoint := s.getAddressEP(nic, tcpip.Address{} /* localAddr */, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil { 1216 return constructAndValidateRoute(netProto, addressEndpoint, nic, nic, tcpip.Address{} /* gateway */, tcpip.Address{} /* localAddr */, remoteAddr, s.handleLocal, false /* multicastLoop */) 1217 } 1218 return nil 1219 } 1220 1221 // findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route 1222 // from the specified NIC. 1223 // 1224 // +checklocksread:s.mu 1225 func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1226 localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint) 1227 if localAddressEndpoint == nil { 1228 return nil 1229 } 1230 1231 var outgoingNIC *nic 1232 // Prefer a local route to the same interface as the local address. 1233 if localAddressNIC.hasAddress(netProto, remoteAddr) { 1234 outgoingNIC = localAddressNIC 1235 } 1236 1237 // If the remote address isn't owned by the local address's NIC, check all 1238 // NICs. 1239 if outgoingNIC == nil { 1240 for _, nic := range s.nics { 1241 if nic.hasAddress(netProto, remoteAddr) { 1242 outgoingNIC = nic 1243 break 1244 } 1245 } 1246 } 1247 1248 // If the remote address is not owned by the stack, we can't return a local 1249 // route. 1250 if outgoingNIC == nil { 1251 localAddressEndpoint.DecRef() 1252 return nil 1253 } 1254 1255 r := makeLocalRoute( 1256 netProto, 1257 localAddr, 1258 remoteAddr, 1259 outgoingNIC, 1260 localAddressNIC, 1261 localAddressEndpoint, 1262 ) 1263 1264 if r.IsOutboundBroadcast() { 1265 r.Release() 1266 return nil 1267 } 1268 1269 return r 1270 } 1271 1272 // findLocalRouteRLocked returns a local route. 1273 // 1274 // A local route is a route to some remote address which the stack owns. That 1275 // is, a local route is a route where packets never have to leave the stack. 1276 // 1277 // +checklocksread:s.mu 1278 func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1279 if localAddr.BitLen() == 0 { 1280 localAddr = remoteAddr 1281 } 1282 1283 if localAddressNICID == 0 { 1284 for _, localAddressNIC := range s.nics { 1285 if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil { 1286 return r 1287 } 1288 } 1289 1290 return nil 1291 } 1292 1293 if localAddressNIC, ok := s.nics[localAddressNICID]; ok { 1294 return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto) 1295 } 1296 1297 return nil 1298 } 1299 1300 // HandleLocal returns true if non-loopback interfaces are allowed to loop packets. 1301 func (s *Stack) HandleLocal() bool { 1302 return s.handleLocal 1303 } 1304 1305 func isNICForwarding(nic *nic, proto tcpip.NetworkProtocolNumber) bool { 1306 switch forwarding, err := nic.forwarding(proto); err.(type) { 1307 case nil: 1308 return forwarding 1309 case *tcpip.ErrUnknownProtocol: 1310 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID())) 1311 case *tcpip.ErrNotSupported: 1312 // Not all network protocols support forwarding. 1313 return false 1314 default: 1315 panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err)) 1316 } 1317 } 1318 1319 // findRouteWithLocalAddrFromAnyInterfaceRLocked returns a route to the given 1320 // destination address, leaving through the given NIC. 1321 // 1322 // Rather than preferring to find a route that uses a local address assigned to 1323 // the outgoing interface, it finds any NIC that holds a matching local address 1324 // endpoint. 1325 // 1326 // +checklocksread:s.mu 1327 func (s *Stack) findRouteWithLocalAddrFromAnyInterfaceRLocked(outgoingNIC *nic, localAddr, remoteAddr, srcHint, gateway tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) *Route { 1328 for _, aNIC := range s.nics { 1329 addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, srcHint, netProto) 1330 if addressEndpoint == nil { 1331 continue 1332 } 1333 1334 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, outgoingNIC, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1335 return r 1336 } 1337 } 1338 return nil 1339 } 1340 1341 // FindRoute creates a route to the given destination address, leaving through 1342 // the given NIC and local address (if provided). 1343 // 1344 // If a NIC is not specified, the returned route will leave through the same 1345 // NIC as the NIC that has the local address assigned when forwarding is 1346 // disabled. If forwarding is enabled and the NIC is unspecified, the route may 1347 // leave through any interface unless the route is link-local. 1348 // 1349 // If no local address is provided, the stack will select a local address. If no 1350 // remote address is provided, the stack will use a remote address equal to the 1351 // local address. 1352 func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) { 1353 s.mu.RLock() 1354 defer s.mu.RUnlock() 1355 1356 // Reject attempts to use unsupported protocols. 1357 if !s.CheckNetworkProtocol(netProto) { 1358 return nil, &tcpip.ErrUnknownProtocol{} 1359 } 1360 1361 isLinkLocal := header.IsV6LinkLocalUnicastAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr) 1362 isLocalBroadcast := remoteAddr == header.IPv4Broadcast 1363 isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr) 1364 isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr) 1365 needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback) 1366 1367 if s.handleLocal && !isMulticast && !isLocalBroadcast { 1368 if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil { 1369 return r, nil 1370 } 1371 } 1372 1373 // If the interface is specified and we do not need a route, return a route 1374 // through the interface if the interface is valid and enabled. 1375 if id != 0 && !needRoute { 1376 if nic, ok := s.nics[id]; ok && nic.Enabled() { 1377 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil { 1378 return makeRoute( 1379 netProto, 1380 tcpip.Address{}, /* gateway */ 1381 localAddr, 1382 remoteAddr, 1383 nic, /* outboundNIC */ 1384 nic, /* localAddressNIC*/ 1385 addressEndpoint, 1386 s.handleLocal, 1387 multicastLoop, 1388 ), nil 1389 } 1390 } 1391 1392 if isLoopback { 1393 return nil, &tcpip.ErrBadLocalAddress{} 1394 } 1395 return nil, &tcpip.ErrNetworkUnreachable{} 1396 } 1397 1398 onlyGlobalAddresses := !header.IsV6LinkLocalUnicastAddress(localAddr) && !isLinkLocal 1399 1400 // Find a route to the remote with the route table. 1401 var chosenRoute tcpip.Route 1402 if r := func() *Route { 1403 s.routeMu.RLock() 1404 defer s.routeMu.RUnlock() 1405 1406 for _, route := range s.routeTable { 1407 if remoteAddr.BitLen() != 0 && !route.Destination.Contains(remoteAddr) { 1408 continue 1409 } 1410 1411 nic, ok := s.nics[route.NIC] 1412 if !ok || !nic.Enabled() { 1413 continue 1414 } 1415 1416 if id == 0 || id == route.NIC { 1417 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, route.SourceHint, netProto); addressEndpoint != nil { 1418 var gateway tcpip.Address 1419 if needRoute { 1420 gateway = route.Gateway 1421 } 1422 r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop) 1423 if r == nil { 1424 panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr)) 1425 } 1426 return r 1427 } 1428 } 1429 1430 // If the stack has forwarding enabled, we haven't found a valid route to 1431 // the remote address yet, and we are routing locally generated traffic, 1432 // keep track of the first valid route. We keep iterating because we 1433 // prefer routes that let us use a local address that is assigned to the 1434 // outgoing interface. There is no requirement to do this from any RFC 1435 // but simply a choice made to better follow a strong host model which 1436 // the netstack follows at the time of writing. 1437 // 1438 // Note that for incoming traffic that we are forwarding (for which the 1439 // NIC and local address are unspecified), we do not keep iterating, as 1440 // there is no reason to prefer routes that let us use a local address 1441 // when routing forwarded (as opposed to locally-generated) traffic. 1442 locallyGenerated := (id != 0 || localAddr != tcpip.Address{}) 1443 if onlyGlobalAddresses && chosenRoute.Equal(tcpip.Route{}) && isNICForwarding(nic, netProto) { 1444 if locallyGenerated { 1445 chosenRoute = route 1446 continue 1447 } 1448 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, route.SourceHint, route.Gateway, netProto, multicastLoop); r != nil { 1449 return r 1450 } 1451 } 1452 } 1453 1454 return nil 1455 }(); r != nil { 1456 return r, nil 1457 } 1458 1459 if !chosenRoute.Equal(tcpip.Route{}) { 1460 // At this point we know the stack has forwarding enabled since chosenRoute is 1461 // only set when forwarding is enabled. 1462 nic, ok := s.nics[chosenRoute.NIC] 1463 if !ok { 1464 // If the route's NIC was invalid, we should not have chosen the route. 1465 panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC)) 1466 } 1467 1468 var gateway tcpip.Address 1469 if needRoute { 1470 gateway = chosenRoute.Gateway 1471 } 1472 1473 // Use the specified NIC to get the local address endpoint. 1474 if id != 0 { 1475 if aNIC, ok := s.nics[id]; ok { 1476 if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, chosenRoute.SourceHint, netProto); addressEndpoint != nil { 1477 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1478 return r, nil 1479 } 1480 } 1481 } 1482 1483 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1484 return nil, &tcpip.ErrHostUnreachable{} 1485 } 1486 1487 if id == 0 { 1488 // If an interface is not specified, try to find a NIC that holds the local 1489 // address endpoint to construct a route. 1490 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, chosenRoute.SourceHint, gateway, netProto, multicastLoop); r != nil { 1491 return r, nil 1492 } 1493 } 1494 } 1495 1496 if needRoute { 1497 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1498 return nil, &tcpip.ErrHostUnreachable{} 1499 } 1500 if header.IsV6LoopbackAddress(remoteAddr) { 1501 return nil, &tcpip.ErrBadLocalAddress{} 1502 } 1503 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1504 return nil, &tcpip.ErrNetworkUnreachable{} 1505 } 1506 1507 // CheckNetworkProtocol checks if a given network protocol is enabled in the 1508 // stack. 1509 func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool { 1510 _, ok := s.networkProtocols[protocol] 1511 return ok 1512 } 1513 1514 // CheckDuplicateAddress performs duplicate address detection for the address on 1515 // the specified interface. 1516 func (s *Stack) CheckDuplicateAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) { 1517 s.mu.RLock() 1518 nic, ok := s.nics[nicID] 1519 s.mu.RUnlock() 1520 1521 if !ok { 1522 return 0, &tcpip.ErrUnknownNICID{} 1523 } 1524 1525 return nic.checkDuplicateAddress(protocol, addr, h) 1526 } 1527 1528 // CheckLocalAddress determines if the given local address exists, and if it 1529 // does, returns the id of the NIC it's bound to. Returns 0 if the address 1530 // does not exist. 1531 func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID { 1532 s.mu.RLock() 1533 defer s.mu.RUnlock() 1534 1535 // If a NIC is specified, use its NIC id. 1536 if nicID != 0 { 1537 nic, ok := s.nics[nicID] 1538 if !ok { 1539 return 0 1540 } 1541 // In IPv4, linux only checks the interface. If it matches, then it does 1542 // not bother with the address. 1543 // https://github.com/torvalds/linux/blob/15205c2829ca2cbb5ece5ceaafe1171a8470e62b/net/ipv4/igmp.c#L1829-L1837 1544 if protocol == header.IPv4ProtocolNumber { 1545 return nic.id 1546 } 1547 if nic.CheckLocalAddress(protocol, addr) { 1548 return nic.id 1549 } 1550 return 0 1551 } 1552 1553 // Go through all the NICs. 1554 for _, nic := range s.nics { 1555 if nic.CheckLocalAddress(protocol, addr) { 1556 return nic.id 1557 } 1558 } 1559 1560 return 0 1561 } 1562 1563 // SetPromiscuousMode enables or disables promiscuous mode in the given NIC. 1564 func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error { 1565 s.mu.RLock() 1566 defer s.mu.RUnlock() 1567 1568 nic, ok := s.nics[nicID] 1569 if !ok { 1570 return &tcpip.ErrUnknownNICID{} 1571 } 1572 1573 nic.setPromiscuousMode(enable) 1574 1575 return nil 1576 } 1577 1578 // SetSpoofing enables or disables address spoofing in the given NIC, allowing 1579 // endpoints to bind to any address in the NIC. 1580 func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error { 1581 s.mu.RLock() 1582 defer s.mu.RUnlock() 1583 1584 nic, ok := s.nics[nicID] 1585 if !ok { 1586 return &tcpip.ErrUnknownNICID{} 1587 } 1588 1589 nic.setSpoofing(enable) 1590 1591 return nil 1592 } 1593 1594 // LinkResolutionResult is the result of a link address resolution attempt. 1595 type LinkResolutionResult struct { 1596 LinkAddress tcpip.LinkAddress 1597 Err tcpip.Error 1598 } 1599 1600 // GetLinkAddress finds the link address corresponding to a network address. 1601 // 1602 // Returns ErrNotSupported if the stack is not configured with a link address 1603 // resolver for the specified network protocol. 1604 // 1605 // Returns ErrWouldBlock if the link address is not readily available, along 1606 // with a notification channel for the caller to block on. Triggers address 1607 // resolution asynchronously. 1608 // 1609 // onResolve will be called either immediately, if resolution is not required, 1610 // or when address resolution is complete, with the resolved link address and 1611 // whether resolution succeeded. 1612 // 1613 // If specified, the local address must be an address local to the interface 1614 // the neighbor cache belongs to. The local address is the source address of 1615 // a packet prompting NUD/link address resolution. 1616 func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error { 1617 s.mu.RLock() 1618 nic, ok := s.nics[nicID] 1619 s.mu.RUnlock() 1620 if !ok { 1621 return &tcpip.ErrUnknownNICID{} 1622 } 1623 1624 return nic.getLinkAddress(addr, localAddr, protocol, onResolve) 1625 } 1626 1627 // Neighbors returns all IP to MAC address associations. 1628 func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) { 1629 s.mu.RLock() 1630 nic, ok := s.nics[nicID] 1631 s.mu.RUnlock() 1632 1633 if !ok { 1634 return nil, &tcpip.ErrUnknownNICID{} 1635 } 1636 1637 return nic.neighbors(protocol) 1638 } 1639 1640 // AddStaticNeighbor statically associates an IP address to a MAC address. 1641 func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error { 1642 s.mu.RLock() 1643 nic, ok := s.nics[nicID] 1644 s.mu.RUnlock() 1645 1646 if !ok { 1647 return &tcpip.ErrUnknownNICID{} 1648 } 1649 1650 return nic.addStaticNeighbor(addr, protocol, linkAddr) 1651 } 1652 1653 // RemoveNeighbor removes an IP to MAC address association previously created 1654 // either automatically or by AddStaticNeighbor. Returns ErrBadAddress if there 1655 // is no association with the provided address. 1656 func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { 1657 s.mu.RLock() 1658 nic, ok := s.nics[nicID] 1659 s.mu.RUnlock() 1660 1661 if !ok { 1662 return &tcpip.ErrUnknownNICID{} 1663 } 1664 1665 return nic.removeNeighbor(protocol, addr) 1666 } 1667 1668 // ClearNeighbors removes all IP to MAC address associations. 1669 func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error { 1670 s.mu.RLock() 1671 nic, ok := s.nics[nicID] 1672 s.mu.RUnlock() 1673 1674 if !ok { 1675 return &tcpip.ErrUnknownNICID{} 1676 } 1677 1678 return nic.clearNeighbors(protocol) 1679 } 1680 1681 // RegisterTransportEndpoint registers the given endpoint with the stack 1682 // transport dispatcher. Received packets that match the provided id will be 1683 // delivered to the given endpoint; specifying a nic is optional, but 1684 // nic-specific IDs have precedence over global ones. 1685 func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1686 return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1687 } 1688 1689 // CheckRegisterTransportEndpoint checks if an endpoint can be registered with 1690 // the stack transport dispatcher. 1691 func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1692 return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice) 1693 } 1694 1695 // UnregisterTransportEndpoint removes the endpoint with the given id from the 1696 // stack transport dispatcher. 1697 func (s *Stack) UnregisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1698 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1699 } 1700 1701 // StartTransportEndpointCleanup removes the endpoint with the given id from 1702 // the stack transport dispatcher. It also transitions it to the cleanup stage. 1703 func (s *Stack) StartTransportEndpointCleanup(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1704 s.cleanupEndpointsMu.Lock() 1705 s.cleanupEndpoints[ep] = struct{}{} 1706 s.cleanupEndpointsMu.Unlock() 1707 1708 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1709 } 1710 1711 // CompleteTransportEndpointCleanup removes the endpoint from the cleanup 1712 // stage. 1713 func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) { 1714 s.cleanupEndpointsMu.Lock() 1715 delete(s.cleanupEndpoints, ep) 1716 s.cleanupEndpointsMu.Unlock() 1717 } 1718 1719 // FindTransportEndpoint finds an endpoint that most closely matches the provided 1720 // id. If no endpoint is found it returns nil. 1721 func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint { 1722 return s.demux.findTransportEndpoint(netProto, transProto, id, nicID) 1723 } 1724 1725 // RegisterRawTransportEndpoint registers the given endpoint with the stack 1726 // transport dispatcher. Received packets that match the provided transport 1727 // protocol will be delivered to the given endpoint. 1728 func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error { 1729 return s.demux.registerRawEndpoint(netProto, transProto, ep) 1730 } 1731 1732 // UnregisterRawTransportEndpoint removes the endpoint for the transport 1733 // protocol from the stack transport dispatcher. 1734 func (s *Stack) UnregisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) { 1735 s.demux.unregisterRawEndpoint(netProto, transProto, ep) 1736 } 1737 1738 // RegisterRestoredEndpoint records e as an endpoint that has been restored on 1739 // this stack. 1740 func (s *Stack) RegisterRestoredEndpoint(e RestoredEndpoint) { 1741 s.mu.Lock() 1742 defer s.mu.Unlock() 1743 1744 s.restoredEndpoints = append(s.restoredEndpoints, e) 1745 } 1746 1747 // RegisterResumableEndpoint records e as an endpoint that has to be resumed. 1748 func (s *Stack) RegisterResumableEndpoint(e ResumableEndpoint) { 1749 s.mu.Lock() 1750 defer s.mu.Unlock() 1751 1752 s.resumableEndpoints = append(s.resumableEndpoints, e) 1753 } 1754 1755 // RegisteredEndpoints returns all endpoints which are currently registered. 1756 func (s *Stack) RegisteredEndpoints() []TransportEndpoint { 1757 s.mu.Lock() 1758 defer s.mu.Unlock() 1759 1760 var es []TransportEndpoint 1761 for _, e := range s.demux.protocol { 1762 es = append(es, e.transportEndpoints()...) 1763 } 1764 return es 1765 } 1766 1767 // CleanupEndpoints returns endpoints currently in the cleanup state. 1768 func (s *Stack) CleanupEndpoints() []TransportEndpoint { 1769 s.cleanupEndpointsMu.Lock() 1770 defer s.cleanupEndpointsMu.Unlock() 1771 1772 es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints)) 1773 for e := range s.cleanupEndpoints { 1774 es = append(es, e) 1775 } 1776 return es 1777 } 1778 1779 // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful 1780 // for restoring a stack after a save. 1781 func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) { 1782 s.cleanupEndpointsMu.Lock() 1783 defer s.cleanupEndpointsMu.Unlock() 1784 1785 for _, e := range es { 1786 s.cleanupEndpoints[e] = struct{}{} 1787 } 1788 } 1789 1790 // Close closes all currently registered transport endpoints. 1791 // 1792 // Endpoints created or modified during this call may not get closed. 1793 func (s *Stack) Close() { 1794 for _, e := range s.RegisteredEndpoints() { 1795 e.Abort() 1796 } 1797 for _, p := range s.transportProtocols { 1798 p.proto.Close() 1799 } 1800 for _, p := range s.networkProtocols { 1801 p.Close() 1802 } 1803 } 1804 1805 // Wait waits for all transport and link endpoints to halt their worker 1806 // goroutines. 1807 // 1808 // Endpoints created or modified during this call may not get waited on. 1809 // 1810 // Note that link endpoints must be stopped via an implementation specific 1811 // mechanism. 1812 func (s *Stack) Wait() { 1813 for _, e := range s.RegisteredEndpoints() { 1814 e.Wait() 1815 } 1816 for _, e := range s.CleanupEndpoints() { 1817 e.Wait() 1818 } 1819 for _, p := range s.transportProtocols { 1820 p.proto.Wait() 1821 } 1822 for _, p := range s.networkProtocols { 1823 p.Wait() 1824 } 1825 1826 s.mu.Lock() 1827 defer s.mu.Unlock() 1828 1829 for id, n := range s.nics { 1830 // Remove NIC to ensure that qDisc goroutines are correctly 1831 // terminated on stack teardown. 1832 s.removeNICLocked(id) 1833 n.NetworkLinkEndpoint.Wait() 1834 } 1835 } 1836 1837 // Destroy destroys the stack with all endpoints. 1838 func (s *Stack) Destroy() { 1839 s.Close() 1840 s.Wait() 1841 } 1842 1843 // Pause pauses any protocol level background workers. 1844 func (s *Stack) Pause() { 1845 for _, p := range s.transportProtocols { 1846 p.proto.Pause() 1847 } 1848 } 1849 1850 // Restore restarts the stack after a restore. This must be called after the 1851 // entire system has been restored. 1852 func (s *Stack) Restore() { 1853 // RestoredEndpoint.Restore() may call other methods on s, so we can't hold 1854 // s.mu while restoring the endpoints. 1855 s.mu.Lock() 1856 eps := s.restoredEndpoints 1857 s.restoredEndpoints = nil 1858 s.mu.Unlock() 1859 for _, e := range eps { 1860 e.Restore(s) 1861 } 1862 // Now resume any protocol level background workers. 1863 for _, p := range s.transportProtocols { 1864 p.proto.Resume() 1865 } 1866 } 1867 1868 // Resume resumes the stack after a save. 1869 func (s *Stack) Resume() { 1870 s.mu.Lock() 1871 eps := s.resumableEndpoints 1872 s.resumableEndpoints = nil 1873 s.mu.Unlock() 1874 for _, e := range eps { 1875 e.Resume() 1876 } 1877 // Now resume any protocol level background workers. 1878 for _, p := range s.transportProtocols { 1879 p.proto.Resume() 1880 } 1881 } 1882 1883 // RegisterPacketEndpoint registers ep with the stack, causing it to receive 1884 // all traffic of the specified netProto on the given NIC. If nicID is 0, it 1885 // receives traffic from every NIC. 1886 func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error { 1887 s.mu.Lock() 1888 defer s.mu.Unlock() 1889 1890 // If no NIC is specified, capture on all devices. 1891 if nicID == 0 { 1892 // Register with each NIC. 1893 for _, nic := range s.nics { 1894 nic.registerPacketEndpoint(netProto, ep) 1895 } 1896 return nil 1897 } 1898 1899 // Capture on a specific device. 1900 nic, ok := s.nics[nicID] 1901 if !ok { 1902 return &tcpip.ErrUnknownNICID{} 1903 } 1904 nic.registerPacketEndpoint(netProto, ep) 1905 1906 return nil 1907 } 1908 1909 // UnregisterPacketEndpoint unregisters ep for packets of the specified 1910 // netProto from the specified NIC. If nicID is 0, ep is unregistered from all 1911 // NICs. 1912 func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1913 s.mu.Lock() 1914 defer s.mu.Unlock() 1915 s.unregisterPacketEndpointLocked(nicID, netProto, ep) 1916 } 1917 1918 // +checklocks:s.mu 1919 func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1920 // If no NIC is specified, unregister on all devices. 1921 if nicID == 0 { 1922 // Unregister with each NIC. 1923 for _, nic := range s.nics { 1924 nic.unregisterPacketEndpoint(netProto, ep) 1925 } 1926 return 1927 } 1928 1929 // Unregister in a single device. 1930 nic, ok := s.nics[nicID] 1931 if !ok { 1932 return 1933 } 1934 nic.unregisterPacketEndpoint(netProto, ep) 1935 } 1936 1937 // WritePacketToRemote writes a payload on the specified NIC using the provided 1938 // network protocol and remote link address. 1939 func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1940 s.mu.Lock() 1941 nic, ok := s.nics[nicID] 1942 s.mu.Unlock() 1943 if !ok { 1944 return &tcpip.ErrUnknownDevice{} 1945 } 1946 pkt := NewPacketBuffer(PacketBufferOptions{ 1947 ReserveHeaderBytes: int(nic.MaxHeaderLength()), 1948 Payload: payload, 1949 }) 1950 defer pkt.DecRef() 1951 pkt.NetworkProtocolNumber = netProto 1952 return nic.WritePacketToRemote(remote, pkt) 1953 } 1954 1955 // WriteRawPacket writes data directly to the specified NIC without adding any 1956 // headers. 1957 func (s *Stack) WriteRawPacket(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1958 s.mu.RLock() 1959 nic, ok := s.nics[nicID] 1960 s.mu.RUnlock() 1961 if !ok { 1962 return &tcpip.ErrUnknownNICID{} 1963 } 1964 1965 pkt := NewPacketBuffer(PacketBufferOptions{ 1966 Payload: payload, 1967 }) 1968 defer pkt.DecRef() 1969 pkt.NetworkProtocolNumber = proto 1970 return nic.writeRawPacketWithLinkHeaderInPayload(pkt) 1971 } 1972 1973 // NetworkProtocolInstance returns the protocol instance in the stack for the 1974 // specified network protocol. This method is public for protocol implementers 1975 // and tests to use. 1976 func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol { 1977 if p, ok := s.networkProtocols[num]; ok { 1978 return p 1979 } 1980 return nil 1981 } 1982 1983 // TransportProtocolInstance returns the protocol instance in the stack for the 1984 // specified transport protocol. This method is public for protocol implementers 1985 // and tests to use. 1986 func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol { 1987 if pState, ok := s.transportProtocols[num]; ok { 1988 return pState.proto 1989 } 1990 return nil 1991 } 1992 1993 // AddTCPProbe installs a probe function that will be invoked on every segment 1994 // received by a given TCP endpoint. The probe function is passed a copy of the 1995 // TCP endpoint state before and after processing of the segment. 1996 // 1997 // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints 1998 // created prior to this call will not call the probe function. 1999 // 2000 // Further, installing two different probes back to back can result in some 2001 // endpoints calling the first one and some the second one. There is no 2002 // guarantee provided on which probe will be invoked. Ideally this should only 2003 // be called once per stack. 2004 func (s *Stack) AddTCPProbe(probe TCPProbeFunc) { 2005 s.tcpProbeFunc.Store(probe) 2006 } 2007 2008 // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil 2009 // otherwise. 2010 func (s *Stack) GetTCPProbe() TCPProbeFunc { 2011 p := s.tcpProbeFunc.Load() 2012 if p == nil { 2013 return nil 2014 } 2015 return p.(TCPProbeFunc) 2016 } 2017 2018 // RemoveTCPProbe removes an installed TCP probe. 2019 // 2020 // NOTE: This only ensures that endpoints created after this call do not 2021 // have a probe attached. Endpoints already created will continue to invoke 2022 // TCP probe. 2023 func (s *Stack) RemoveTCPProbe() { 2024 // This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics. 2025 s.tcpProbeFunc.Store(TCPProbeFunc(nil)) 2026 } 2027 2028 // JoinGroup joins the given multicast group on the given NIC. 2029 func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 2030 s.mu.RLock() 2031 defer s.mu.RUnlock() 2032 2033 if nic, ok := s.nics[nicID]; ok { 2034 return nic.joinGroup(protocol, multicastAddr) 2035 } 2036 return &tcpip.ErrUnknownNICID{} 2037 } 2038 2039 // LeaveGroup leaves the given multicast group on the given NIC. 2040 func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 2041 s.mu.RLock() 2042 defer s.mu.RUnlock() 2043 2044 if nic, ok := s.nics[nicID]; ok { 2045 return nic.leaveGroup(protocol, multicastAddr) 2046 } 2047 return &tcpip.ErrUnknownNICID{} 2048 } 2049 2050 // IsInGroup returns true if the NIC with ID nicID has joined the multicast 2051 // group multicastAddr. 2052 func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) { 2053 s.mu.RLock() 2054 defer s.mu.RUnlock() 2055 2056 if nic, ok := s.nics[nicID]; ok { 2057 return nic.isInGroup(multicastAddr), nil 2058 } 2059 return false, &tcpip.ErrUnknownNICID{} 2060 } 2061 2062 // IPTables returns the stack's iptables. 2063 func (s *Stack) IPTables() *IPTables { 2064 return s.tables 2065 } 2066 2067 // ICMPLimit returns the maximum number of ICMP messages that can be sent 2068 // in one second. 2069 func (s *Stack) ICMPLimit() rate.Limit { 2070 return s.icmpRateLimiter.Limit() 2071 } 2072 2073 // SetICMPLimit sets the maximum number of ICMP messages that be sent 2074 // in one second. 2075 func (s *Stack) SetICMPLimit(newLimit rate.Limit) { 2076 s.icmpRateLimiter.SetLimit(newLimit) 2077 } 2078 2079 // ICMPBurst returns the maximum number of ICMP messages that can be sent 2080 // in a single burst. 2081 func (s *Stack) ICMPBurst() int { 2082 return s.icmpRateLimiter.Burst() 2083 } 2084 2085 // SetICMPBurst sets the maximum number of ICMP messages that can be sent 2086 // in a single burst. 2087 func (s *Stack) SetICMPBurst(burst int) { 2088 s.icmpRateLimiter.SetBurst(burst) 2089 } 2090 2091 // AllowICMPMessage returns true if we the rate limiter allows at least one 2092 // ICMP message to be sent at this instant. 2093 func (s *Stack) AllowICMPMessage() bool { 2094 return s.icmpRateLimiter.Allow() 2095 } 2096 2097 // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol 2098 // number installed on the specified NIC. 2099 func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) { 2100 s.mu.Lock() 2101 defer s.mu.Unlock() 2102 2103 nic, ok := s.nics[nicID] 2104 if !ok { 2105 return nil, &tcpip.ErrUnknownNICID{} 2106 } 2107 2108 return nic.getNetworkEndpoint(proto), nil 2109 } 2110 2111 // NUDConfigurations gets the per-interface NUD configurations. 2112 func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) { 2113 s.mu.RLock() 2114 nic, ok := s.nics[id] 2115 s.mu.RUnlock() 2116 2117 if !ok { 2118 return NUDConfigurations{}, &tcpip.ErrUnknownNICID{} 2119 } 2120 2121 return nic.nudConfigs(proto) 2122 } 2123 2124 // SetNUDConfigurations sets the per-interface NUD configurations. 2125 // 2126 // Note, if c contains invalid NUD configuration values, it will be fixed to 2127 // use default values for the erroneous values. 2128 func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error { 2129 s.mu.RLock() 2130 nic, ok := s.nics[id] 2131 s.mu.RUnlock() 2132 2133 if !ok { 2134 return &tcpip.ErrUnknownNICID{} 2135 } 2136 2137 return nic.setNUDConfigs(proto, c) 2138 } 2139 2140 // Seed returns a 32 bit value that can be used as a seed value. 2141 // 2142 // NOTE: The seed is generated once during stack initialization only. 2143 func (s *Stack) Seed() uint32 { 2144 return s.seed 2145 } 2146 2147 // InsecureRNG returns a reference to a pseudo random generator that can be used 2148 // to generate random numbers as required. It is not cryptographically secure 2149 // and should not be used for security sensitive work. 2150 func (s *Stack) InsecureRNG() *rand.Rand { 2151 return s.insecureRNG 2152 } 2153 2154 // SecureRNG returns the stack's cryptographically secure random number 2155 // generator. 2156 func (s *Stack) SecureRNG() cryptorand.RNG { 2157 return s.secureRNG 2158 } 2159 2160 // FindNICNameFromID returns the name of the NIC for the given NICID. 2161 func (s *Stack) FindNICNameFromID(id tcpip.NICID) string { 2162 s.mu.RLock() 2163 defer s.mu.RUnlock() 2164 2165 nic, ok := s.nics[id] 2166 if !ok { 2167 return "" 2168 } 2169 2170 return nic.Name() 2171 } 2172 2173 // ParseResult indicates the result of a parsing attempt. 2174 type ParseResult int 2175 2176 const ( 2177 // ParsedOK indicates that a packet was successfully parsed. 2178 ParsedOK ParseResult = iota 2179 2180 // UnknownTransportProtocol indicates that the transport protocol is unknown. 2181 UnknownTransportProtocol 2182 2183 // TransportLayerParseError indicates that the transport packet was not 2184 // successfully parsed. 2185 TransportLayerParseError 2186 ) 2187 2188 // ParsePacketBufferTransport parses the provided packet buffer's transport 2189 // header. 2190 func (s *Stack) ParsePacketBufferTransport(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) ParseResult { 2191 pkt.TransportProtocolNumber = protocol 2192 // Parse the transport header if present. 2193 state, ok := s.transportProtocols[protocol] 2194 if !ok { 2195 return UnknownTransportProtocol 2196 } 2197 2198 if !state.proto.Parse(pkt) { 2199 return TransportLayerParseError 2200 } 2201 2202 return ParsedOK 2203 } 2204 2205 // networkProtocolNumbers returns the network protocol numbers the stack is 2206 // configured with. 2207 func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber { 2208 protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols)) 2209 for p := range s.networkProtocols { 2210 protos = append(protos, p) 2211 } 2212 return protos 2213 } 2214 2215 func isSubnetBroadcastOnNIC(nic *nic, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2216 addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint) 2217 if addressEndpoint == nil { 2218 return false 2219 } 2220 2221 subnet := addressEndpoint.Subnet() 2222 addressEndpoint.DecRef() 2223 return subnet.IsBroadcast(addr) 2224 } 2225 2226 // IsSubnetBroadcast returns true if the provided address is a subnet-local 2227 // broadcast address on the specified NIC and protocol. 2228 // 2229 // Returns false if the NIC is unknown or if the protocol is unknown or does 2230 // not support addressing. 2231 // 2232 // If the NIC is not specified, the stack will check all NICs. 2233 func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2234 s.mu.RLock() 2235 defer s.mu.RUnlock() 2236 2237 if nicID != 0 { 2238 nic, ok := s.nics[nicID] 2239 if !ok { 2240 return false 2241 } 2242 2243 return isSubnetBroadcastOnNIC(nic, protocol, addr) 2244 } 2245 2246 for _, nic := range s.nics { 2247 if isSubnetBroadcastOnNIC(nic, protocol, addr) { 2248 return true 2249 } 2250 } 2251 2252 return false 2253 } 2254 2255 // PacketEndpointWriteSupported returns true iff packet endpoints support write 2256 // operations. 2257 func (s *Stack) PacketEndpointWriteSupported() bool { 2258 return s.packetEndpointWriteSupported 2259 }