github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/tcpip/stack/stack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package stack provides the glue between networking protocols and the 16 // consumers of the networking stack. 17 // 18 // For consumers, the only function of interest is New(), everything else is 19 // provided by the tcpip/public package. 20 package stack 21 22 import ( 23 "encoding/binary" 24 "fmt" 25 "io" 26 "math/rand" 27 "sync/atomic" 28 "time" 29 30 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 31 "github.com/MerlinKodo/gvisor/pkg/buffer" 32 "github.com/MerlinKodo/gvisor/pkg/log" 33 cryptorand "github.com/MerlinKodo/gvisor/pkg/rand" 34 "github.com/MerlinKodo/gvisor/pkg/tcpip" 35 "github.com/MerlinKodo/gvisor/pkg/tcpip/header" 36 "github.com/MerlinKodo/gvisor/pkg/tcpip/ports" 37 "github.com/MerlinKodo/gvisor/pkg/waiter" 38 "golang.org/x/time/rate" 39 ) 40 41 const ( 42 // DefaultTOS is the default type of service value for network endpoints. 43 DefaultTOS = 0 44 ) 45 46 type transportProtocolState struct { 47 proto TransportProtocol 48 defaultHandler func(id TransportEndpointID, pkt PacketBufferPtr) bool 49 } 50 51 // ResumableEndpoint is an endpoint that needs to be resumed after restore. 52 type ResumableEndpoint interface { 53 // Resume resumes an endpoint after restore. This can be used to restart 54 // background workers such as protocol goroutines. This must be called after 55 // all indirect dependencies of the endpoint has been restored, which 56 // generally implies at the end of the restore process. 57 Resume(*Stack) 58 } 59 60 // uniqueIDGenerator is a default unique ID generator. 61 type uniqueIDGenerator atomicbitops.Uint64 62 63 func (u *uniqueIDGenerator) UniqueID() uint64 { 64 return ((*atomicbitops.Uint64)(u)).Add(1) 65 } 66 67 var netRawMissingLogger = log.BasicRateLimitedLogger(time.Minute) 68 69 // Stack is a networking stack, with all supported protocols, NICs, and route 70 // table. 71 // 72 // LOCK ORDERING: mu > routeMu. 73 type Stack struct { 74 transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState 75 networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol 76 77 // rawFactory creates raw endpoints. If nil, raw endpoints are 78 // disabled. It is set during Stack creation and is immutable. 79 rawFactory RawFactory 80 packetEndpointWriteSupported bool 81 82 demux *transportDemuxer 83 84 stats tcpip.Stats 85 86 // routeMu protects annotated fields below. 87 routeMu routeStackRWMutex 88 89 // +checklocks:routeMu 90 routeTable []tcpip.Route 91 92 mu stackRWMutex 93 // +checklocks:mu 94 nics map[tcpip.NICID]*nic 95 defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{} 96 97 // cleanupEndpointsMu protects cleanupEndpoints. 98 cleanupEndpointsMu cleanupEndpointsMutex 99 // +checklocks:cleanupEndpointsMu 100 cleanupEndpoints map[TransportEndpoint]struct{} 101 102 *ports.PortManager 103 104 // If not nil, then any new endpoints will have this probe function 105 // invoked everytime they receive a TCP segment. 106 tcpProbeFunc atomic.Value // TCPProbeFunc 107 108 // clock is used to generate user-visible times. 109 clock tcpip.Clock 110 111 // handleLocal allows non-loopback interfaces to loop packets. 112 handleLocal bool 113 114 // tables are the iptables packet filtering and manipulation rules. 115 // TODO(gvisor.dev/issue/4595): S/R this field. 116 tables *IPTables 117 118 // resumableEndpoints is a list of endpoints that need to be resumed if the 119 // stack is being restored. 120 resumableEndpoints []ResumableEndpoint 121 122 // icmpRateLimiter is a global rate limiter for all ICMP messages generated 123 // by the stack. 124 icmpRateLimiter *ICMPRateLimiter 125 126 // seed is a one-time random value initialized at stack startup. 127 // 128 // TODO(gvisor.dev/issue/940): S/R this field. 129 seed uint32 130 131 // nudConfigs is the default NUD configurations used by interfaces. 132 nudConfigs NUDConfigurations 133 134 // nudDisp is the NUD event dispatcher that is used to send the netstack 135 // integrator NUD related events. 136 nudDisp NUDDispatcher 137 138 // uniqueIDGenerator is a generator of unique identifiers. 139 uniqueIDGenerator UniqueID 140 141 // randomGenerator is an injectable pseudo random generator that can be 142 // used when a random number is required. 143 randomGenerator *rand.Rand 144 145 // secureRNG is a cryptographically secure random number generator. 146 secureRNG io.Reader 147 148 // sendBufferSize holds the min/default/max send buffer sizes for 149 // endpoints other than TCP. 150 sendBufferSize tcpip.SendBufferSizeOption 151 152 // receiveBufferSize holds the min/default/max receive buffer sizes for 153 // endpoints other than TCP. 154 receiveBufferSize tcpip.ReceiveBufferSizeOption 155 156 // tcpInvalidRateLimit is the maximal rate for sending duplicate 157 // acknowledgements in response to incoming TCP packets that are for an existing 158 // connection but that are invalid due to any of the following reasons: 159 // 160 // a) out-of-window sequence number. 161 // b) out-of-window acknowledgement number. 162 // c) PAWS check failure (when implemented). 163 // 164 // This is required to prevent potential ACK loops. 165 // Setting this to 0 will disable all rate limiting. 166 tcpInvalidRateLimit time.Duration 167 168 // tsOffsetSecret is the secret key for generating timestamp offsets 169 // initialized at stack startup. 170 tsOffsetSecret uint32 171 } 172 173 // UniqueID is an abstract generator of unique identifiers. 174 type UniqueID interface { 175 UniqueID() uint64 176 } 177 178 // NetworkProtocolFactory instantiates a network protocol. 179 // 180 // NetworkProtocolFactory must not attempt to modify the stack, it may only 181 // query the stack. 182 type NetworkProtocolFactory func(*Stack) NetworkProtocol 183 184 // TransportProtocolFactory instantiates a transport protocol. 185 // 186 // TransportProtocolFactory must not attempt to modify the stack, it may only 187 // query the stack. 188 type TransportProtocolFactory func(*Stack) TransportProtocol 189 190 // Options contains optional Stack configuration. 191 type Options struct { 192 // NetworkProtocols lists the network protocols to enable. 193 NetworkProtocols []NetworkProtocolFactory 194 195 // TransportProtocols lists the transport protocols to enable. 196 TransportProtocols []TransportProtocolFactory 197 198 // Clock is an optional clock used for timekeeping. 199 // 200 // If Clock is nil, tcpip.NewStdClock() will be used. 201 Clock tcpip.Clock 202 203 // Stats are optional statistic counters. 204 Stats tcpip.Stats 205 206 // HandleLocal indicates whether packets destined to their source 207 // should be handled by the stack internally (true) or outside the 208 // stack (false). 209 HandleLocal bool 210 211 // UniqueID is an optional generator of unique identifiers. 212 UniqueID UniqueID 213 214 // NUDConfigs is the default NUD configurations used by interfaces. 215 NUDConfigs NUDConfigurations 216 217 // NUDDisp is the NUD event dispatcher that an integrator can provide to 218 // receive NUD related events. 219 NUDDisp NUDDispatcher 220 221 // RawFactory produces raw endpoints. Raw endpoints are enabled only if 222 // this is non-nil. 223 RawFactory RawFactory 224 225 // AllowPacketEndpointWrite determines if packet endpoints support write 226 // operations. 227 AllowPacketEndpointWrite bool 228 229 // RandSource is an optional source to use to generate random 230 // numbers. If omitted it defaults to a Source seeded by the data 231 // returned by the stack secure RNG. 232 // 233 // RandSource must be thread-safe. 234 RandSource rand.Source 235 236 // IPTables are the initial iptables rules. If nil, DefaultIPTables will be 237 // used to construct the initial iptables rules. 238 // all traffic. 239 IPTables *IPTables 240 241 // DefaultIPTables is an optional iptables rules constructor that is called 242 // if IPTables is nil. If both fields are nil, iptables will allow all 243 // traffic. 244 DefaultIPTables func(clock tcpip.Clock, rand *rand.Rand) *IPTables 245 246 // SecureRNG is a cryptographically secure random number generator. 247 SecureRNG io.Reader 248 } 249 250 // TransportEndpointInfo holds useful information about a transport endpoint 251 // which can be queried by monitoring tools. 252 // 253 // +stateify savable 254 type TransportEndpointInfo struct { 255 // The following fields are initialized at creation time and are 256 // immutable. 257 258 NetProto tcpip.NetworkProtocolNumber 259 TransProto tcpip.TransportProtocolNumber 260 261 // The following fields are protected by endpoint mu. 262 263 ID TransportEndpointID 264 // BindNICID and bindAddr are set via calls to Bind(). They are used to 265 // reject attempts to send data or connect via a different NIC or 266 // address 267 BindNICID tcpip.NICID 268 BindAddr tcpip.Address 269 // RegisterNICID is the default NICID registered as a side-effect of 270 // connect or datagram write. 271 RegisterNICID tcpip.NICID 272 } 273 274 // AddrNetProtoLocked unwraps the specified address if it is a V4-mapped V6 275 // address and returns the network protocol number to be used to communicate 276 // with the specified address. It returns an error if the passed address is 277 // incompatible with the receiver. 278 // 279 // Preconditon: the parent endpoint mu must be held while calling this method. 280 func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { 281 netProto := t.NetProto 282 switch addr.Addr.BitLen() { 283 case header.IPv4AddressSizeBits: 284 netProto = header.IPv4ProtocolNumber 285 case header.IPv6AddressSizeBits: 286 if header.IsV4MappedAddress(addr.Addr) { 287 netProto = header.IPv4ProtocolNumber 288 addr.Addr = tcpip.AddrFrom4Slice(addr.Addr.AsSlice()[header.IPv6AddressSize-header.IPv4AddressSize:]) 289 if addr.Addr == header.IPv4Any { 290 addr.Addr = tcpip.Address{} 291 } 292 } 293 } 294 295 switch t.ID.LocalAddress.BitLen() { 296 case header.IPv4AddressSizeBits: 297 if addr.Addr.BitLen() == header.IPv6AddressSizeBits { 298 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 299 } 300 case header.IPv6AddressSizeBits: 301 if addr.Addr.BitLen() == header.IPv4AddressSizeBits { 302 return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{} 303 } 304 } 305 306 switch { 307 case netProto == t.NetProto: 308 case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber: 309 if v6only { 310 return tcpip.FullAddress{}, 0, &tcpip.ErrHostUnreachable{} 311 } 312 default: 313 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 314 } 315 316 return addr, netProto, nil 317 } 318 319 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo 320 // marker interface. 321 func (*TransportEndpointInfo) IsEndpointInfo() {} 322 323 // New allocates a new networking stack with only the requested networking and 324 // transport protocols configured with default options. 325 // 326 // Note, NDPConfigurations will be fixed before being used by the Stack. That 327 // is, if an invalid value was provided, it will be reset to the default value. 328 // 329 // Protocol options can be changed by calling the 330 // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the 331 // stack. Please refer to individual protocol implementations as to what options 332 // are supported. 333 func New(opts Options) *Stack { 334 clock := opts.Clock 335 if clock == nil { 336 clock = tcpip.NewStdClock() 337 } 338 339 if opts.UniqueID == nil { 340 opts.UniqueID = new(uniqueIDGenerator) 341 } 342 343 if opts.SecureRNG == nil { 344 opts.SecureRNG = cryptorand.Reader 345 } 346 347 randSrc := opts.RandSource 348 if randSrc == nil { 349 var v int64 350 if err := binary.Read(opts.SecureRNG, binary.LittleEndian, &v); err != nil { 351 panic(err) 352 } 353 // Source provided by rand.NewSource is not thread-safe so 354 // we wrap it in a simple thread-safe version. 355 randSrc = &lockedRandomSource{src: rand.NewSource(v)} 356 } 357 randomGenerator := rand.New(randSrc) 358 359 if opts.IPTables == nil { 360 if opts.DefaultIPTables == nil { 361 opts.DefaultIPTables = DefaultTables 362 } 363 opts.IPTables = opts.DefaultIPTables(clock, randomGenerator) 364 } 365 366 opts.NUDConfigs.resetInvalidFields() 367 368 s := &Stack{ 369 transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState), 370 networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol), 371 nics: make(map[tcpip.NICID]*nic), 372 packetEndpointWriteSupported: opts.AllowPacketEndpointWrite, 373 defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}), 374 cleanupEndpoints: make(map[TransportEndpoint]struct{}), 375 PortManager: ports.NewPortManager(), 376 clock: clock, 377 stats: opts.Stats.FillIn(), 378 handleLocal: opts.HandleLocal, 379 tables: opts.IPTables, 380 icmpRateLimiter: NewICMPRateLimiter(clock), 381 seed: randomGenerator.Uint32(), 382 nudConfigs: opts.NUDConfigs, 383 uniqueIDGenerator: opts.UniqueID, 384 nudDisp: opts.NUDDisp, 385 randomGenerator: randomGenerator, 386 secureRNG: opts.SecureRNG, 387 sendBufferSize: tcpip.SendBufferSizeOption{ 388 Min: MinBufferSize, 389 Default: DefaultBufferSize, 390 Max: DefaultMaxBufferSize, 391 }, 392 receiveBufferSize: tcpip.ReceiveBufferSizeOption{ 393 Min: MinBufferSize, 394 Default: DefaultBufferSize, 395 Max: DefaultMaxBufferSize, 396 }, 397 tcpInvalidRateLimit: defaultTCPInvalidRateLimit, 398 tsOffsetSecret: randomGenerator.Uint32(), 399 } 400 401 // Add specified network protocols. 402 for _, netProtoFactory := range opts.NetworkProtocols { 403 netProto := netProtoFactory(s) 404 s.networkProtocols[netProto.Number()] = netProto 405 } 406 407 // Add specified transport protocols. 408 for _, transProtoFactory := range opts.TransportProtocols { 409 transProto := transProtoFactory(s) 410 s.transportProtocols[transProto.Number()] = &transportProtocolState{ 411 proto: transProto, 412 } 413 } 414 415 // Add the factory for raw endpoints, if present. 416 s.rawFactory = opts.RawFactory 417 418 // Create the global transport demuxer. 419 s.demux = newTransportDemuxer(s) 420 421 return s 422 } 423 424 // UniqueID returns a unique identifier. 425 func (s *Stack) UniqueID() uint64 { 426 return s.uniqueIDGenerator.UniqueID() 427 } 428 429 // SetNetworkProtocolOption allows configuring individual protocol level 430 // options. This method returns an error if the protocol is not supported or 431 // option is not supported by the protocol implementation or the provided value 432 // is incorrect. 433 func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error { 434 netProto, ok := s.networkProtocols[network] 435 if !ok { 436 return &tcpip.ErrUnknownProtocol{} 437 } 438 return netProto.SetOption(option) 439 } 440 441 // NetworkProtocolOption allows retrieving individual protocol level option 442 // values. This method returns an error if the protocol is not supported or 443 // option is not supported by the protocol implementation. E.g.: 444 // 445 // var v ipv4.MyOption 446 // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v) 447 // if err != nil { 448 // ... 449 // } 450 func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error { 451 netProto, ok := s.networkProtocols[network] 452 if !ok { 453 return &tcpip.ErrUnknownProtocol{} 454 } 455 return netProto.Option(option) 456 } 457 458 // SetTransportProtocolOption allows configuring individual protocol level 459 // options. This method returns an error if the protocol is not supported or 460 // option is not supported by the protocol implementation or the provided value 461 // is incorrect. 462 func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error { 463 transProtoState, ok := s.transportProtocols[transport] 464 if !ok { 465 return &tcpip.ErrUnknownProtocol{} 466 } 467 return transProtoState.proto.SetOption(option) 468 } 469 470 // TransportProtocolOption allows retrieving individual protocol level option 471 // values. This method returns an error if the protocol is not supported or 472 // option is not supported by the protocol implementation. 473 // 474 // var v tcp.SACKEnabled 475 // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil { 476 // ... 477 // } 478 func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error { 479 transProtoState, ok := s.transportProtocols[transport] 480 if !ok { 481 return &tcpip.ErrUnknownProtocol{} 482 } 483 return transProtoState.proto.Option(option) 484 } 485 486 // SetTransportProtocolHandler sets the per-stack default handler for the given 487 // protocol. 488 // 489 // It must be called only during initialization of the stack. Changing it as the 490 // stack is operating is not supported. 491 func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, PacketBufferPtr) bool) { 492 state := s.transportProtocols[p] 493 if state != nil { 494 state.defaultHandler = h 495 } 496 } 497 498 // Clock returns the Stack's clock for retrieving the current time and 499 // scheduling work. 500 func (s *Stack) Clock() tcpip.Clock { 501 return s.clock 502 } 503 504 // Stats returns a mutable copy of the current stats. 505 // 506 // This is not generally exported via the public interface, but is available 507 // internally. 508 func (s *Stack) Stats() tcpip.Stats { 509 return s.stats 510 } 511 512 // SetNICForwarding enables or disables packet forwarding on the specified NIC 513 // for the passed protocol. 514 // 515 // Returns the previous configuration on the NIC. 516 func (s *Stack) SetNICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 517 s.mu.RLock() 518 defer s.mu.RUnlock() 519 520 nic, ok := s.nics[id] 521 if !ok { 522 return false, &tcpip.ErrUnknownNICID{} 523 } 524 525 return nic.setForwarding(protocol, enable) 526 } 527 528 // NICForwarding returns the forwarding configuration for the specified NIC. 529 func (s *Stack) NICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 530 s.mu.RLock() 531 defer s.mu.RUnlock() 532 533 nic, ok := s.nics[id] 534 if !ok { 535 return false, &tcpip.ErrUnknownNICID{} 536 } 537 538 return nic.forwarding(protocol) 539 } 540 541 // SetForwardingDefaultAndAllNICs sets packet forwarding for all NICs for the 542 // passed protocol and sets the default setting for newly created NICs. 543 func (s *Stack) SetForwardingDefaultAndAllNICs(protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error { 544 s.mu.Lock() 545 defer s.mu.Unlock() 546 547 doneOnce := false 548 for id, nic := range s.nics { 549 if _, err := nic.setForwarding(protocol, enable); err != nil { 550 // Expect forwarding to be settable on all interfaces if it was set on 551 // one. 552 if doneOnce { 553 panic(fmt.Sprintf("nic(id=%d).setForwarding(%d, %t): %s", id, protocol, enable, err)) 554 } 555 556 return err 557 } 558 559 doneOnce = true 560 } 561 562 if enable { 563 s.defaultForwardingEnabled[protocol] = struct{}{} 564 } else { 565 delete(s.defaultForwardingEnabled, protocol) 566 } 567 568 return nil 569 } 570 571 // AddMulticastRoute adds a multicast route to be used for the specified 572 // addresses and protocol. 573 func (s *Stack) AddMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination, route MulticastRoute) tcpip.Error { 574 netProto, ok := s.networkProtocols[protocol] 575 if !ok { 576 return &tcpip.ErrUnknownProtocol{} 577 } 578 579 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 580 if !ok { 581 return &tcpip.ErrNotSupported{} 582 } 583 584 return forwardingNetProto.AddMulticastRoute(addresses, route) 585 } 586 587 // RemoveMulticastRoute removes a multicast route that matches the specified 588 // addresses and protocol. 589 func (s *Stack) RemoveMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) tcpip.Error { 590 netProto, ok := s.networkProtocols[protocol] 591 if !ok { 592 return &tcpip.ErrUnknownProtocol{} 593 } 594 595 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 596 if !ok { 597 return &tcpip.ErrNotSupported{} 598 } 599 600 return forwardingNetProto.RemoveMulticastRoute(addresses) 601 } 602 603 // MulticastRouteLastUsedTime returns a monotonic timestamp that represents the 604 // last time that the route that matches the provided addresses and protocol 605 // was used or updated. 606 func (s *Stack) MulticastRouteLastUsedTime(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 607 netProto, ok := s.networkProtocols[protocol] 608 if !ok { 609 return tcpip.MonotonicTime{}, &tcpip.ErrUnknownProtocol{} 610 } 611 612 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 613 if !ok { 614 return tcpip.MonotonicTime{}, &tcpip.ErrNotSupported{} 615 } 616 617 return forwardingNetProto.MulticastRouteLastUsedTime(addresses) 618 } 619 620 // EnableMulticastForwardingForProtocol enables multicast forwarding for the 621 // provided protocol. 622 // 623 // Returns true if forwarding was already enabled on the protocol. 624 // Additionally, returns an error if: 625 // 626 // - The protocol is not found. 627 // - The protocol doesn't support multicast forwarding. 628 // - The multicast forwarding event dispatcher is nil. 629 // 630 // If successful, future multicast forwarding events will be sent to the 631 // provided event dispatcher. 632 func (s *Stack) EnableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber, disp MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 633 netProto, ok := s.networkProtocols[protocol] 634 if !ok { 635 return false, &tcpip.ErrUnknownProtocol{} 636 } 637 638 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 639 if !ok { 640 return false, &tcpip.ErrNotSupported{} 641 } 642 643 return forwardingNetProto.EnableMulticastForwarding(disp) 644 } 645 646 // DisableMulticastForwardingForProtocol disables multicast forwarding for the 647 // provided protocol. 648 // 649 // Returns an error if the provided protocol is not found or if it does not 650 // support multicast forwarding. 651 func (s *Stack) DisableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber) tcpip.Error { 652 netProto, ok := s.networkProtocols[protocol] 653 if !ok { 654 return &tcpip.ErrUnknownProtocol{} 655 } 656 657 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 658 if !ok { 659 return &tcpip.ErrNotSupported{} 660 } 661 662 forwardingNetProto.DisableMulticastForwarding() 663 return nil 664 } 665 666 // SetNICMulticastForwarding enables or disables multicast packet forwarding on 667 // the specified NIC for the passed protocol. 668 // 669 // Returns the previous configuration on the NIC. 670 // 671 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 672 // forwarding. Currently, setting this value is a no-op and is not ready for 673 // use. 674 func (s *Stack) SetNICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 675 s.mu.RLock() 676 defer s.mu.RUnlock() 677 678 nic, ok := s.nics[id] 679 if !ok { 680 return false, &tcpip.ErrUnknownNICID{} 681 } 682 683 return nic.setMulticastForwarding(protocol, enable) 684 } 685 686 // NICMulticastForwarding returns the multicast forwarding configuration for 687 // the specified NIC. 688 func (s *Stack) NICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 689 s.mu.RLock() 690 defer s.mu.RUnlock() 691 692 nic, ok := s.nics[id] 693 if !ok { 694 return false, &tcpip.ErrUnknownNICID{} 695 } 696 697 return nic.multicastForwarding(protocol) 698 } 699 700 // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in 701 // both IPv4 and IPv6. 702 func (s *Stack) PortRange() (uint16, uint16) { 703 return s.PortManager.PortRange() 704 } 705 706 // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range 707 // (inclusive). 708 func (s *Stack) SetPortRange(start uint16, end uint16) tcpip.Error { 709 return s.PortManager.SetPortRange(start, end) 710 } 711 712 // GROTimeout returns the GRO timeout. 713 func (s *Stack) GROTimeout(nicID tcpip.NICID) (time.Duration, tcpip.Error) { 714 s.mu.RLock() 715 defer s.mu.RUnlock() 716 717 nic, ok := s.nics[nicID] 718 if !ok { 719 return 0, &tcpip.ErrUnknownNICID{} 720 } 721 722 return nic.gro.getInterval(), nil 723 } 724 725 // SetGROTimeout sets the GRO timeout. 726 func (s *Stack) SetGROTimeout(nicID tcpip.NICID, timeout time.Duration) tcpip.Error { 727 s.mu.RLock() 728 defer s.mu.RUnlock() 729 730 nic, ok := s.nics[nicID] 731 if !ok { 732 return &tcpip.ErrUnknownNICID{} 733 } 734 735 nic.gro.setInterval(timeout) 736 return nil 737 } 738 739 // SetRouteTable assigns the route table to be used by this stack. It 740 // specifies which NIC to use for given destination address ranges. 741 // 742 // This method takes ownership of the table. 743 func (s *Stack) SetRouteTable(table []tcpip.Route) { 744 s.routeMu.Lock() 745 defer s.routeMu.Unlock() 746 s.routeTable = table 747 } 748 749 // GetRouteTable returns the route table which is currently in use. 750 func (s *Stack) GetRouteTable() []tcpip.Route { 751 s.routeMu.RLock() 752 defer s.routeMu.RUnlock() 753 return append([]tcpip.Route(nil), s.routeTable...) 754 } 755 756 // AddRoute appends a route to the route table. 757 func (s *Stack) AddRoute(route tcpip.Route) { 758 s.routeMu.Lock() 759 defer s.routeMu.Unlock() 760 s.routeTable = append(s.routeTable, route) 761 } 762 763 // RemoveRoutes removes matching routes from the route table. 764 func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) { 765 s.routeMu.Lock() 766 defer s.routeMu.Unlock() 767 768 var filteredRoutes []tcpip.Route 769 for _, route := range s.routeTable { 770 if !match(route) { 771 filteredRoutes = append(filteredRoutes, route) 772 } 773 } 774 s.routeTable = filteredRoutes 775 } 776 777 // NewEndpoint creates a new transport layer endpoint of the given protocol. 778 func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 779 t, ok := s.transportProtocols[transport] 780 if !ok { 781 return nil, &tcpip.ErrUnknownProtocol{} 782 } 783 784 return t.proto.NewEndpoint(network, waiterQueue) 785 } 786 787 // NewRawEndpoint creates a new raw transport layer endpoint of the given 788 // protocol. Raw endpoints receive all traffic for a given protocol regardless 789 // of address. 790 func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) { 791 if s.rawFactory == nil { 792 netRawMissingLogger.Infof("A process tried to create a raw socket, but --net-raw was not specified. Should runsc be run with --net-raw?") 793 return nil, &tcpip.ErrNotPermitted{} 794 } 795 796 if !associated { 797 return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue) 798 } 799 800 t, ok := s.transportProtocols[transport] 801 if !ok { 802 return nil, &tcpip.ErrUnknownProtocol{} 803 } 804 805 return t.proto.NewRawEndpoint(network, waiterQueue) 806 } 807 808 // NewPacketEndpoint creates a new packet endpoint listening for the given 809 // netProto. 810 func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 811 if s.rawFactory == nil { 812 return nil, &tcpip.ErrNotPermitted{} 813 } 814 815 return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue) 816 } 817 818 // NICContext is an opaque pointer used to store client-supplied NIC metadata. 819 type NICContext any 820 821 // NICOptions specifies the configuration of a NIC as it is being created. 822 // The zero value creates an enabled, unnamed NIC. 823 type NICOptions struct { 824 // Name specifies the name of the NIC. 825 Name string 826 827 // Disabled specifies whether to avoid calling Attach on the passed 828 // LinkEndpoint. 829 Disabled bool 830 831 // Context specifies user-defined data that will be returned in stack.NICInfo 832 // for the NIC. Clients of this library can use it to add metadata that 833 // should be tracked alongside a NIC, to avoid having to keep a 834 // map[tcpip.NICID]metadata mirroring stack.Stack's nic map. 835 Context NICContext 836 837 // QDisc is the queue discipline to use for this NIC. 838 QDisc QueueingDiscipline 839 840 // GROTimeout specifies the GRO timeout. Zero bypasses GRO. 841 GROTimeout time.Duration 842 } 843 844 // CreateNICWithOptions creates a NIC with the provided id, LinkEndpoint, and 845 // NICOptions. See the documentation on type NICOptions for details on how 846 // NICs can be configured. 847 // 848 // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher. 849 func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error { 850 s.mu.Lock() 851 defer s.mu.Unlock() 852 853 // Make sure id is unique. 854 if _, ok := s.nics[id]; ok { 855 return &tcpip.ErrDuplicateNICID{} 856 } 857 858 // Make sure name is unique, unless unnamed. 859 if opts.Name != "" { 860 for _, n := range s.nics { 861 if n.Name() == opts.Name { 862 return &tcpip.ErrDuplicateNICID{} 863 } 864 } 865 } 866 867 n := newNIC(s, id, ep, opts) 868 for proto := range s.defaultForwardingEnabled { 869 if _, err := n.setForwarding(proto, true); err != nil { 870 panic(fmt.Sprintf("newNIC(%d, ...).setForwarding(%d, true): %s", id, proto, err)) 871 } 872 } 873 s.nics[id] = n 874 if !opts.Disabled { 875 return n.enable() 876 } 877 878 return nil 879 } 880 881 // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls 882 // LinkEndpoint.Attach to bind ep with a NetworkDispatcher. 883 func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error { 884 return s.CreateNICWithOptions(id, ep, NICOptions{}) 885 } 886 887 // GetLinkEndpointByName gets the link endpoint specified by name. 888 func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint { 889 s.mu.RLock() 890 defer s.mu.RUnlock() 891 for _, nic := range s.nics { 892 if nic.Name() == name { 893 linkEP, ok := nic.NetworkLinkEndpoint.(LinkEndpoint) 894 if !ok { 895 panic(fmt.Sprintf("unexpected NetworkLinkEndpoint(%#v) is not a LinkEndpoint", nic.NetworkLinkEndpoint)) 896 } 897 return linkEP 898 } 899 } 900 return nil 901 } 902 903 // EnableNIC enables the given NIC so that the link-layer endpoint can start 904 // delivering packets to it. 905 func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error { 906 s.mu.RLock() 907 defer s.mu.RUnlock() 908 909 nic, ok := s.nics[id] 910 if !ok { 911 return &tcpip.ErrUnknownNICID{} 912 } 913 914 return nic.enable() 915 } 916 917 // DisableNIC disables the given NIC. 918 func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error { 919 s.mu.RLock() 920 defer s.mu.RUnlock() 921 922 nic, ok := s.nics[id] 923 if !ok { 924 return &tcpip.ErrUnknownNICID{} 925 } 926 927 nic.disable() 928 return nil 929 } 930 931 // CheckNIC checks if a NIC is usable. 932 func (s *Stack) CheckNIC(id tcpip.NICID) bool { 933 s.mu.RLock() 934 defer s.mu.RUnlock() 935 936 nic, ok := s.nics[id] 937 if !ok { 938 return false 939 } 940 941 return nic.Enabled() 942 } 943 944 // RemoveNIC removes NIC and all related routes from the network stack. 945 func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error { 946 s.mu.Lock() 947 defer s.mu.Unlock() 948 949 return s.removeNICLocked(id) 950 } 951 952 // removeNICLocked removes NIC and all related routes from the network stack. 953 // 954 // +checklocks:s.mu 955 func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error { 956 nic, ok := s.nics[id] 957 if !ok { 958 return &tcpip.ErrUnknownNICID{} 959 } 960 delete(s.nics, id) 961 962 // Remove routes in-place. n tracks the number of routes written. 963 s.routeMu.Lock() 964 n := 0 965 for i, r := range s.routeTable { 966 s.routeTable[i] = tcpip.Route{} 967 if r.NIC != id { 968 // Keep this route. 969 s.routeTable[n] = r 970 n++ 971 } 972 } 973 s.routeTable = s.routeTable[:n] 974 s.routeMu.Unlock() 975 976 return nic.remove() 977 } 978 979 // NICInfo captures the name and addresses assigned to a NIC. 980 type NICInfo struct { 981 Name string 982 LinkAddress tcpip.LinkAddress 983 ProtocolAddresses []tcpip.ProtocolAddress 984 985 // Flags indicate the state of the NIC. 986 Flags NICStateFlags 987 988 // MTU is the maximum transmission unit. 989 MTU uint32 990 991 Stats tcpip.NICStats 992 993 // NetworkStats holds the stats of each NetworkEndpoint bound to the NIC. 994 NetworkStats map[tcpip.NetworkProtocolNumber]NetworkEndpointStats 995 996 // Context is user-supplied data optionally supplied in CreateNICWithOptions. 997 // See type NICOptions for more details. 998 Context NICContext 999 1000 // ARPHardwareType holds the ARP Hardware type of the NIC. This is the 1001 // value sent in haType field of an ARP Request sent by this NIC and the 1002 // value expected in the haType field of an ARP response. 1003 ARPHardwareType header.ARPHardwareType 1004 1005 // Forwarding holds the forwarding status for each network endpoint that 1006 // supports forwarding. 1007 Forwarding map[tcpip.NetworkProtocolNumber]bool 1008 1009 // MulticastForwarding holds the forwarding status for each network endpoint 1010 // that supports multicast forwarding. 1011 MulticastForwarding map[tcpip.NetworkProtocolNumber]bool 1012 } 1013 1014 // HasNIC returns true if the NICID is defined in the stack. 1015 func (s *Stack) HasNIC(id tcpip.NICID) bool { 1016 s.mu.RLock() 1017 _, ok := s.nics[id] 1018 s.mu.RUnlock() 1019 return ok 1020 } 1021 1022 // NICInfo returns a map of NICIDs to their associated information. 1023 func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo { 1024 s.mu.RLock() 1025 defer s.mu.RUnlock() 1026 1027 type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error) 1028 forwardingValue := func(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) { 1029 switch forwarding, err := forwardingFn(proto); err.(type) { 1030 case nil: 1031 return forwarding, true 1032 case *tcpip.ErrUnknownProtocol: 1033 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID)) 1034 case *tcpip.ErrNotSupported: 1035 // Not all network protocols support forwarding. 1036 default: 1037 panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err)) 1038 } 1039 return false, false 1040 } 1041 1042 nics := make(map[tcpip.NICID]NICInfo) 1043 for id, nic := range s.nics { 1044 flags := NICStateFlags{ 1045 Up: true, // Netstack interfaces are always up. 1046 Running: nic.Enabled(), 1047 Promiscuous: nic.Promiscuous(), 1048 Loopback: nic.IsLoopback(), 1049 } 1050 1051 netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats) 1052 for proto, netEP := range nic.networkEndpoints { 1053 netStats[proto] = netEP.Stats() 1054 } 1055 1056 info := NICInfo{ 1057 Name: nic.name, 1058 LinkAddress: nic.NetworkLinkEndpoint.LinkAddress(), 1059 ProtocolAddresses: nic.primaryAddresses(), 1060 Flags: flags, 1061 MTU: nic.NetworkLinkEndpoint.MTU(), 1062 Stats: nic.stats.local, 1063 NetworkStats: netStats, 1064 Context: nic.context, 1065 ARPHardwareType: nic.NetworkLinkEndpoint.ARPHardwareType(), 1066 Forwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1067 MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1068 } 1069 1070 for proto := range s.networkProtocols { 1071 if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok { 1072 info.Forwarding[proto] = forwarding 1073 } 1074 1075 if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok { 1076 info.MulticastForwarding[proto] = multicastForwarding 1077 } 1078 } 1079 1080 nics[id] = info 1081 } 1082 return nics 1083 } 1084 1085 // NICStateFlags holds information about the state of an NIC. 1086 type NICStateFlags struct { 1087 // Up indicates whether the interface is running. 1088 Up bool 1089 1090 // Running indicates whether resources are allocated. 1091 Running bool 1092 1093 // Promiscuous indicates whether the interface is in promiscuous mode. 1094 Promiscuous bool 1095 1096 // Loopback indicates whether the interface is a loopback. 1097 Loopback bool 1098 } 1099 1100 // AddProtocolAddress adds an address to the specified NIC, possibly with extra 1101 // properties. 1102 func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error { 1103 s.mu.RLock() 1104 defer s.mu.RUnlock() 1105 1106 nic, ok := s.nics[id] 1107 if !ok { 1108 return &tcpip.ErrUnknownNICID{} 1109 } 1110 1111 return nic.addAddress(protocolAddress, properties) 1112 } 1113 1114 // RemoveAddress removes an existing network-layer address from the specified 1115 // NIC. 1116 func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error { 1117 s.mu.RLock() 1118 defer s.mu.RUnlock() 1119 1120 if nic, ok := s.nics[id]; ok { 1121 return nic.removeAddress(addr) 1122 } 1123 1124 return &tcpip.ErrUnknownNICID{} 1125 } 1126 1127 // SetAddressLifetimes sets informational preferred and valid lifetimes, and 1128 // whether the address should be preferred or deprecated. 1129 func (s *Stack) SetAddressLifetimes(id tcpip.NICID, addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error { 1130 s.mu.RLock() 1131 defer s.mu.RUnlock() 1132 1133 if nic, ok := s.nics[id]; ok { 1134 return nic.setAddressLifetimes(addr, lifetimes) 1135 } 1136 1137 return &tcpip.ErrUnknownNICID{} 1138 } 1139 1140 // AllAddresses returns a map of NICIDs to their protocol addresses (primary 1141 // and non-primary). 1142 func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress { 1143 s.mu.RLock() 1144 defer s.mu.RUnlock() 1145 1146 nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress) 1147 for id, nic := range s.nics { 1148 nics[id] = nic.allPermanentAddresses() 1149 } 1150 return nics 1151 } 1152 1153 // GetMainNICAddress returns the first non-deprecated primary address and prefix 1154 // for the given NIC and protocol. If no non-deprecated primary addresses exist, 1155 // a deprecated address will be returned. If no deprecated addresses exist, the 1156 // zero value will be returned. 1157 func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) { 1158 s.mu.RLock() 1159 defer s.mu.RUnlock() 1160 1161 nic, ok := s.nics[id] 1162 if !ok { 1163 return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownNICID{} 1164 } 1165 1166 return nic.PrimaryAddress(protocol) 1167 } 1168 1169 func (s *Stack) getAddressEP(nic *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint { 1170 if localAddr.BitLen() == 0 { 1171 return nic.primaryEndpoint(netProto, remoteAddr) 1172 } 1173 return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint) 1174 } 1175 1176 // NewRouteForMulticast returns a Route that may be used to forward multicast 1177 // packets. 1178 // 1179 // Returns nil if validation fails. 1180 func (s *Stack) NewRouteForMulticast(nicID tcpip.NICID, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1181 s.mu.RLock() 1182 defer s.mu.RUnlock() 1183 1184 nic, ok := s.nics[nicID] 1185 if !ok || !nic.Enabled() { 1186 return nil 1187 } 1188 1189 if addressEndpoint := s.getAddressEP(nic, tcpip.Address{} /* localAddr */, remoteAddr, netProto); addressEndpoint != nil { 1190 return constructAndValidateRoute(netProto, addressEndpoint, nic, nic, tcpip.Address{} /* gateway */, tcpip.Address{} /* localAddr */, remoteAddr, s.handleLocal, false /* multicastLoop */) 1191 } 1192 return nil 1193 } 1194 1195 // findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route 1196 // from the specified NIC. 1197 // 1198 // +checklocksread:s.mu 1199 func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1200 localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint) 1201 if localAddressEndpoint == nil { 1202 return nil 1203 } 1204 1205 var outgoingNIC *nic 1206 // Prefer a local route to the same interface as the local address. 1207 if localAddressNIC.hasAddress(netProto, remoteAddr) { 1208 outgoingNIC = localAddressNIC 1209 } 1210 1211 // If the remote address isn't owned by the local address's NIC, check all 1212 // NICs. 1213 if outgoingNIC == nil { 1214 for _, nic := range s.nics { 1215 if nic.hasAddress(netProto, remoteAddr) { 1216 outgoingNIC = nic 1217 break 1218 } 1219 } 1220 } 1221 1222 // If the remote address is not owned by the stack, we can't return a local 1223 // route. 1224 if outgoingNIC == nil { 1225 localAddressEndpoint.DecRef() 1226 return nil 1227 } 1228 1229 r := makeLocalRoute( 1230 netProto, 1231 localAddr, 1232 remoteAddr, 1233 outgoingNIC, 1234 localAddressNIC, 1235 localAddressEndpoint, 1236 ) 1237 1238 if r.IsOutboundBroadcast() { 1239 r.Release() 1240 return nil 1241 } 1242 1243 return r 1244 } 1245 1246 // findLocalRouteRLocked returns a local route. 1247 // 1248 // A local route is a route to some remote address which the stack owns. That 1249 // is, a local route is a route where packets never have to leave the stack. 1250 // 1251 // +checklocksread:s.mu 1252 func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1253 if localAddr.BitLen() == 0 { 1254 localAddr = remoteAddr 1255 } 1256 1257 if localAddressNICID == 0 { 1258 for _, localAddressNIC := range s.nics { 1259 if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil { 1260 return r 1261 } 1262 } 1263 1264 return nil 1265 } 1266 1267 if localAddressNIC, ok := s.nics[localAddressNICID]; ok { 1268 return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto) 1269 } 1270 1271 return nil 1272 } 1273 1274 // HandleLocal returns true if non-loopback interfaces are allowed to loop packets. 1275 func (s *Stack) HandleLocal() bool { 1276 return s.handleLocal 1277 } 1278 1279 func isNICForwarding(nic *nic, proto tcpip.NetworkProtocolNumber) bool { 1280 switch forwarding, err := nic.forwarding(proto); err.(type) { 1281 case nil: 1282 return forwarding 1283 case *tcpip.ErrUnknownProtocol: 1284 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID())) 1285 case *tcpip.ErrNotSupported: 1286 // Not all network protocols support forwarding. 1287 return false 1288 default: 1289 panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err)) 1290 } 1291 } 1292 1293 // FindRoute creates a route to the given destination address, leaving through 1294 // the given NIC and local address (if provided). 1295 // 1296 // If a NIC is not specified, the returned route will leave through the same 1297 // NIC as the NIC that has the local address assigned when forwarding is 1298 // disabled. If forwarding is enabled and the NIC is unspecified, the route may 1299 // leave through any interface unless the route is link-local. 1300 // 1301 // If no local address is provided, the stack will select a local address. If no 1302 // remote address is provided, the stack wil use a remote address equal to the 1303 // local address. 1304 func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) { 1305 s.mu.RLock() 1306 defer s.mu.RUnlock() 1307 1308 isLinkLocal := header.IsV6LinkLocalUnicastAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr) 1309 isLocalBroadcast := remoteAddr == header.IPv4Broadcast 1310 isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr) 1311 isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr) 1312 needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback) 1313 1314 if s.handleLocal && !isMulticast && !isLocalBroadcast { 1315 if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil { 1316 return r, nil 1317 } 1318 } 1319 1320 // If the interface is specified and we do not need a route, return a route 1321 // through the interface if the interface is valid and enabled. 1322 if id != 0 && !needRoute { 1323 if nic, ok := s.nics[id]; ok && nic.Enabled() { 1324 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil { 1325 return makeRoute( 1326 netProto, 1327 tcpip.Address{}, /* gateway */ 1328 localAddr, 1329 remoteAddr, 1330 nic, /* outboundNIC */ 1331 nic, /* localAddressNIC*/ 1332 addressEndpoint, 1333 s.handleLocal, 1334 multicastLoop, 1335 ), nil 1336 } 1337 } 1338 1339 if isLoopback { 1340 return nil, &tcpip.ErrBadLocalAddress{} 1341 } 1342 return nil, &tcpip.ErrNetworkUnreachable{} 1343 } 1344 1345 onlyGlobalAddresses := !header.IsV6LinkLocalUnicastAddress(localAddr) && !isLinkLocal 1346 1347 // Find a route to the remote with the route table. 1348 var chosenRoute tcpip.Route 1349 if r := func() *Route { 1350 s.routeMu.RLock() 1351 defer s.routeMu.RUnlock() 1352 1353 for _, route := range s.routeTable { 1354 if remoteAddr.BitLen() != 0 && !route.Destination.Contains(remoteAddr) { 1355 continue 1356 } 1357 1358 nic, ok := s.nics[route.NIC] 1359 if !ok || !nic.Enabled() { 1360 continue 1361 } 1362 1363 if id == 0 || id == route.NIC { 1364 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil { 1365 var gateway tcpip.Address 1366 if needRoute { 1367 gateway = route.Gateway 1368 } 1369 r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop) 1370 if r == nil { 1371 panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr)) 1372 } 1373 return r 1374 } 1375 } 1376 1377 // If the stack has forwarding enabled and we haven't found a valid route 1378 // to the remote address yet, keep track of the first valid route. We 1379 // keep iterating because we prefer routes that let us use a local 1380 // address that is assigned to the outgoing interface. There is no 1381 // requirement to do this from any RFC but simply a choice made to better 1382 // follow a strong host model which the netstack follows at the time of 1383 // writing. 1384 if onlyGlobalAddresses && chosenRoute.Equal(tcpip.Route{}) && isNICForwarding(nic, netProto) { 1385 chosenRoute = route 1386 } 1387 } 1388 1389 return nil 1390 }(); r != nil { 1391 return r, nil 1392 } 1393 1394 if !chosenRoute.Equal(tcpip.Route{}) { 1395 // At this point we know the stack has forwarding enabled since chosenRoute is 1396 // only set when forwarding is enabled. 1397 nic, ok := s.nics[chosenRoute.NIC] 1398 if !ok { 1399 // If the route's NIC was invalid, we should not have chosen the route. 1400 panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC)) 1401 } 1402 1403 var gateway tcpip.Address 1404 if needRoute { 1405 gateway = chosenRoute.Gateway 1406 } 1407 1408 // Use the specified NIC to get the local address endpoint. 1409 if id != 0 { 1410 if aNIC, ok := s.nics[id]; ok { 1411 if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, netProto); addressEndpoint != nil { 1412 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1413 return r, nil 1414 } 1415 } 1416 } 1417 1418 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1419 return nil, &tcpip.ErrHostUnreachable{} 1420 } 1421 1422 if id == 0 { 1423 // If an interface is not specified, try to find a NIC that holds the local 1424 // address endpoint to construct a route. 1425 for _, aNIC := range s.nics { 1426 addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, netProto) 1427 if addressEndpoint == nil { 1428 continue 1429 } 1430 1431 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1432 return r, nil 1433 } 1434 } 1435 } 1436 } 1437 1438 if needRoute { 1439 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1440 return nil, &tcpip.ErrHostUnreachable{} 1441 } 1442 if header.IsV6LoopbackAddress(remoteAddr) { 1443 return nil, &tcpip.ErrBadLocalAddress{} 1444 } 1445 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1446 return nil, &tcpip.ErrNetworkUnreachable{} 1447 } 1448 1449 // CheckNetworkProtocol checks if a given network protocol is enabled in the 1450 // stack. 1451 func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool { 1452 _, ok := s.networkProtocols[protocol] 1453 return ok 1454 } 1455 1456 // CheckDuplicateAddress performs duplicate address detection for the address on 1457 // the specified interface. 1458 func (s *Stack) CheckDuplicateAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) { 1459 s.mu.RLock() 1460 nic, ok := s.nics[nicID] 1461 s.mu.RUnlock() 1462 1463 if !ok { 1464 return 0, &tcpip.ErrUnknownNICID{} 1465 } 1466 1467 return nic.checkDuplicateAddress(protocol, addr, h) 1468 } 1469 1470 // CheckLocalAddress determines if the given local address exists, and if it 1471 // does, returns the id of the NIC it's bound to. Returns 0 if the address 1472 // does not exist. 1473 func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID { 1474 s.mu.RLock() 1475 defer s.mu.RUnlock() 1476 1477 // If a NIC is specified, use its NIC id. 1478 if nicID != 0 { 1479 nic, ok := s.nics[nicID] 1480 if !ok { 1481 return 0 1482 } 1483 // In IPv4, linux only checks the interface. If it matches, then it does 1484 // not bother with the address. 1485 // https://github.com/torvalds/linux/blob/15205c2829ca2cbb5ece5ceaafe1171a8470e62b/net/ipv4/igmp.c#L1829-L1837 1486 if protocol == header.IPv4ProtocolNumber { 1487 return nic.id 1488 } 1489 if nic.CheckLocalAddress(protocol, addr) { 1490 return nic.id 1491 } 1492 return 0 1493 } 1494 1495 // Go through all the NICs. 1496 for _, nic := range s.nics { 1497 if nic.CheckLocalAddress(protocol, addr) { 1498 return nic.id 1499 } 1500 } 1501 1502 return 0 1503 } 1504 1505 // SetPromiscuousMode enables or disables promiscuous mode in the given NIC. 1506 func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error { 1507 s.mu.RLock() 1508 defer s.mu.RUnlock() 1509 1510 nic, ok := s.nics[nicID] 1511 if !ok { 1512 return &tcpip.ErrUnknownNICID{} 1513 } 1514 1515 nic.setPromiscuousMode(enable) 1516 1517 return nil 1518 } 1519 1520 // SetSpoofing enables or disables address spoofing in the given NIC, allowing 1521 // endpoints to bind to any address in the NIC. 1522 func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error { 1523 s.mu.RLock() 1524 defer s.mu.RUnlock() 1525 1526 nic, ok := s.nics[nicID] 1527 if !ok { 1528 return &tcpip.ErrUnknownNICID{} 1529 } 1530 1531 nic.setSpoofing(enable) 1532 1533 return nil 1534 } 1535 1536 // LinkResolutionResult is the result of a link address resolution attempt. 1537 type LinkResolutionResult struct { 1538 LinkAddress tcpip.LinkAddress 1539 Err tcpip.Error 1540 } 1541 1542 // GetLinkAddress finds the link address corresponding to a network address. 1543 // 1544 // Returns ErrNotSupported if the stack is not configured with a link address 1545 // resolver for the specified network protocol. 1546 // 1547 // Returns ErrWouldBlock if the link address is not readily available, along 1548 // with a notification channel for the caller to block on. Triggers address 1549 // resolution asynchronously. 1550 // 1551 // onResolve will be called either immediately, if resolution is not required, 1552 // or when address resolution is complete, with the resolved link address and 1553 // whether resolution succeeded. 1554 // 1555 // If specified, the local address must be an address local to the interface 1556 // the neighbor cache belongs to. The local address is the source address of 1557 // a packet prompting NUD/link address resolution. 1558 func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error { 1559 s.mu.RLock() 1560 nic, ok := s.nics[nicID] 1561 s.mu.RUnlock() 1562 if !ok { 1563 return &tcpip.ErrUnknownNICID{} 1564 } 1565 1566 return nic.getLinkAddress(addr, localAddr, protocol, onResolve) 1567 } 1568 1569 // Neighbors returns all IP to MAC address associations. 1570 func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) { 1571 s.mu.RLock() 1572 nic, ok := s.nics[nicID] 1573 s.mu.RUnlock() 1574 1575 if !ok { 1576 return nil, &tcpip.ErrUnknownNICID{} 1577 } 1578 1579 return nic.neighbors(protocol) 1580 } 1581 1582 // AddStaticNeighbor statically associates an IP address to a MAC address. 1583 func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error { 1584 s.mu.RLock() 1585 nic, ok := s.nics[nicID] 1586 s.mu.RUnlock() 1587 1588 if !ok { 1589 return &tcpip.ErrUnknownNICID{} 1590 } 1591 1592 return nic.addStaticNeighbor(addr, protocol, linkAddr) 1593 } 1594 1595 // RemoveNeighbor removes an IP to MAC address association previously created 1596 // either automically or by AddStaticNeighbor. Returns ErrBadAddress if there 1597 // is no association with the provided address. 1598 func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { 1599 s.mu.RLock() 1600 nic, ok := s.nics[nicID] 1601 s.mu.RUnlock() 1602 1603 if !ok { 1604 return &tcpip.ErrUnknownNICID{} 1605 } 1606 1607 return nic.removeNeighbor(protocol, addr) 1608 } 1609 1610 // ClearNeighbors removes all IP to MAC address associations. 1611 func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error { 1612 s.mu.RLock() 1613 nic, ok := s.nics[nicID] 1614 s.mu.RUnlock() 1615 1616 if !ok { 1617 return &tcpip.ErrUnknownNICID{} 1618 } 1619 1620 return nic.clearNeighbors(protocol) 1621 } 1622 1623 // RegisterTransportEndpoint registers the given endpoint with the stack 1624 // transport dispatcher. Received packets that match the provided id will be 1625 // delivered to the given endpoint; specifying a nic is optional, but 1626 // nic-specific IDs have precedence over global ones. 1627 func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1628 return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1629 } 1630 1631 // CheckRegisterTransportEndpoint checks if an endpoint can be registered with 1632 // the stack transport dispatcher. 1633 func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1634 return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice) 1635 } 1636 1637 // UnregisterTransportEndpoint removes the endpoint with the given id from the 1638 // stack transport dispatcher. 1639 func (s *Stack) UnregisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1640 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1641 } 1642 1643 // StartTransportEndpointCleanup removes the endpoint with the given id from 1644 // the stack transport dispatcher. It also transitions it to the cleanup stage. 1645 func (s *Stack) StartTransportEndpointCleanup(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1646 s.cleanupEndpointsMu.Lock() 1647 s.cleanupEndpoints[ep] = struct{}{} 1648 s.cleanupEndpointsMu.Unlock() 1649 1650 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1651 } 1652 1653 // CompleteTransportEndpointCleanup removes the endpoint from the cleanup 1654 // stage. 1655 func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) { 1656 s.cleanupEndpointsMu.Lock() 1657 delete(s.cleanupEndpoints, ep) 1658 s.cleanupEndpointsMu.Unlock() 1659 } 1660 1661 // FindTransportEndpoint finds an endpoint that most closely matches the provided 1662 // id. If no endpoint is found it returns nil. 1663 func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint { 1664 return s.demux.findTransportEndpoint(netProto, transProto, id, nicID) 1665 } 1666 1667 // RegisterRawTransportEndpoint registers the given endpoint with the stack 1668 // transport dispatcher. Received packets that match the provided transport 1669 // protocol will be delivered to the given endpoint. 1670 func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error { 1671 return s.demux.registerRawEndpoint(netProto, transProto, ep) 1672 } 1673 1674 // UnregisterRawTransportEndpoint removes the endpoint for the transport 1675 // protocol from the stack transport dispatcher. 1676 func (s *Stack) UnregisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) { 1677 s.demux.unregisterRawEndpoint(netProto, transProto, ep) 1678 } 1679 1680 // RegisterRestoredEndpoint records e as an endpoint that has been restored on 1681 // this stack. 1682 func (s *Stack) RegisterRestoredEndpoint(e ResumableEndpoint) { 1683 s.mu.Lock() 1684 s.resumableEndpoints = append(s.resumableEndpoints, e) 1685 s.mu.Unlock() 1686 } 1687 1688 // RegisteredEndpoints returns all endpoints which are currently registered. 1689 func (s *Stack) RegisteredEndpoints() []TransportEndpoint { 1690 s.mu.Lock() 1691 defer s.mu.Unlock() 1692 var es []TransportEndpoint 1693 for _, e := range s.demux.protocol { 1694 es = append(es, e.transportEndpoints()...) 1695 } 1696 return es 1697 } 1698 1699 // CleanupEndpoints returns endpoints currently in the cleanup state. 1700 func (s *Stack) CleanupEndpoints() []TransportEndpoint { 1701 s.cleanupEndpointsMu.Lock() 1702 es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints)) 1703 for e := range s.cleanupEndpoints { 1704 es = append(es, e) 1705 } 1706 s.cleanupEndpointsMu.Unlock() 1707 return es 1708 } 1709 1710 // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful 1711 // for restoring a stack after a save. 1712 func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) { 1713 s.cleanupEndpointsMu.Lock() 1714 for _, e := range es { 1715 s.cleanupEndpoints[e] = struct{}{} 1716 } 1717 s.cleanupEndpointsMu.Unlock() 1718 } 1719 1720 // Close closes all currently registered transport endpoints. 1721 // 1722 // Endpoints created or modified during this call may not get closed. 1723 func (s *Stack) Close() { 1724 for _, e := range s.RegisteredEndpoints() { 1725 e.Abort() 1726 } 1727 for _, p := range s.transportProtocols { 1728 p.proto.Close() 1729 } 1730 for _, p := range s.networkProtocols { 1731 p.Close() 1732 } 1733 } 1734 1735 // Wait waits for all transport and link endpoints to halt their worker 1736 // goroutines. 1737 // 1738 // Endpoints created or modified during this call may not get waited on. 1739 // 1740 // Note that link endpoints must be stopped via an implementation specific 1741 // mechanism. 1742 func (s *Stack) Wait() { 1743 for _, e := range s.RegisteredEndpoints() { 1744 e.Wait() 1745 } 1746 for _, e := range s.CleanupEndpoints() { 1747 e.Wait() 1748 } 1749 for _, p := range s.transportProtocols { 1750 p.proto.Wait() 1751 } 1752 for _, p := range s.networkProtocols { 1753 p.Wait() 1754 } 1755 1756 s.mu.Lock() 1757 defer s.mu.Unlock() 1758 1759 for id, n := range s.nics { 1760 // Remove NIC to ensure that qDisc goroutines are correctly 1761 // terminated on stack teardown. 1762 s.removeNICLocked(id) 1763 n.NetworkLinkEndpoint.Wait() 1764 } 1765 } 1766 1767 // Destroy destroys the stack with all endpoints. 1768 func (s *Stack) Destroy() { 1769 s.Close() 1770 s.Wait() 1771 } 1772 1773 // Pause pauses any protocol level background workers. 1774 func (s *Stack) Pause() { 1775 for _, p := range s.transportProtocols { 1776 p.proto.Pause() 1777 } 1778 } 1779 1780 // Resume restarts the stack after a restore. This must be called after the 1781 // entire system has been restored. 1782 func (s *Stack) Resume() { 1783 // ResumableEndpoint.Resume() may call other methods on s, so we can't hold 1784 // s.mu while resuming the endpoints. 1785 s.mu.Lock() 1786 eps := s.resumableEndpoints 1787 s.resumableEndpoints = nil 1788 s.mu.Unlock() 1789 for _, e := range eps { 1790 e.Resume(s) 1791 } 1792 // Now resume any protocol level background workers. 1793 for _, p := range s.transportProtocols { 1794 p.proto.Resume() 1795 } 1796 } 1797 1798 // RegisterPacketEndpoint registers ep with the stack, causing it to receive 1799 // all traffic of the specified netProto on the given NIC. If nicID is 0, it 1800 // receives traffic from every NIC. 1801 func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error { 1802 s.mu.Lock() 1803 defer s.mu.Unlock() 1804 1805 // If no NIC is specified, capture on all devices. 1806 if nicID == 0 { 1807 // Register with each NIC. 1808 for _, nic := range s.nics { 1809 if err := nic.registerPacketEndpoint(netProto, ep); err != nil { 1810 s.unregisterPacketEndpointLocked(0, netProto, ep) 1811 return err 1812 } 1813 } 1814 return nil 1815 } 1816 1817 // Capture on a specific device. 1818 nic, ok := s.nics[nicID] 1819 if !ok { 1820 return &tcpip.ErrUnknownNICID{} 1821 } 1822 if err := nic.registerPacketEndpoint(netProto, ep); err != nil { 1823 return err 1824 } 1825 1826 return nil 1827 } 1828 1829 // UnregisterPacketEndpoint unregisters ep for packets of the specified 1830 // netProto from the specified NIC. If nicID is 0, ep is unregistered from all 1831 // NICs. 1832 func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1833 s.mu.Lock() 1834 defer s.mu.Unlock() 1835 s.unregisterPacketEndpointLocked(nicID, netProto, ep) 1836 } 1837 1838 // +checklocks:s.mu 1839 func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1840 // If no NIC is specified, unregister on all devices. 1841 if nicID == 0 { 1842 // Unregister with each NIC. 1843 for _, nic := range s.nics { 1844 nic.unregisterPacketEndpoint(netProto, ep) 1845 } 1846 return 1847 } 1848 1849 // Unregister in a single device. 1850 nic, ok := s.nics[nicID] 1851 if !ok { 1852 return 1853 } 1854 nic.unregisterPacketEndpoint(netProto, ep) 1855 } 1856 1857 // WritePacketToRemote writes a payload on the specified NIC using the provided 1858 // network protocol and remote link address. 1859 func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1860 s.mu.Lock() 1861 nic, ok := s.nics[nicID] 1862 s.mu.Unlock() 1863 if !ok { 1864 return &tcpip.ErrUnknownDevice{} 1865 } 1866 pkt := NewPacketBuffer(PacketBufferOptions{ 1867 ReserveHeaderBytes: int(nic.MaxHeaderLength()), 1868 Payload: payload, 1869 }) 1870 defer pkt.DecRef() 1871 pkt.NetworkProtocolNumber = netProto 1872 return nic.WritePacketToRemote(remote, pkt) 1873 } 1874 1875 // WriteRawPacket writes data directly to the specified NIC without adding any 1876 // headers. 1877 func (s *Stack) WriteRawPacket(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1878 s.mu.RLock() 1879 nic, ok := s.nics[nicID] 1880 s.mu.RUnlock() 1881 if !ok { 1882 return &tcpip.ErrUnknownNICID{} 1883 } 1884 1885 pkt := NewPacketBuffer(PacketBufferOptions{ 1886 Payload: payload, 1887 }) 1888 defer pkt.DecRef() 1889 pkt.NetworkProtocolNumber = proto 1890 return nic.writeRawPacketWithLinkHeaderInPayload(pkt) 1891 } 1892 1893 // NetworkProtocolInstance returns the protocol instance in the stack for the 1894 // specified network protocol. This method is public for protocol implementers 1895 // and tests to use. 1896 func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol { 1897 if p, ok := s.networkProtocols[num]; ok { 1898 return p 1899 } 1900 return nil 1901 } 1902 1903 // TransportProtocolInstance returns the protocol instance in the stack for the 1904 // specified transport protocol. This method is public for protocol implementers 1905 // and tests to use. 1906 func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol { 1907 if pState, ok := s.transportProtocols[num]; ok { 1908 return pState.proto 1909 } 1910 return nil 1911 } 1912 1913 // AddTCPProbe installs a probe function that will be invoked on every segment 1914 // received by a given TCP endpoint. The probe function is passed a copy of the 1915 // TCP endpoint state before and after processing of the segment. 1916 // 1917 // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints 1918 // created prior to this call will not call the probe function. 1919 // 1920 // Further, installing two different probes back to back can result in some 1921 // endpoints calling the first one and some the second one. There is no 1922 // guarantee provided on which probe will be invoked. Ideally this should only 1923 // be called once per stack. 1924 func (s *Stack) AddTCPProbe(probe TCPProbeFunc) { 1925 s.tcpProbeFunc.Store(probe) 1926 } 1927 1928 // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil 1929 // otherwise. 1930 func (s *Stack) GetTCPProbe() TCPProbeFunc { 1931 p := s.tcpProbeFunc.Load() 1932 if p == nil { 1933 return nil 1934 } 1935 return p.(TCPProbeFunc) 1936 } 1937 1938 // RemoveTCPProbe removes an installed TCP probe. 1939 // 1940 // NOTE: This only ensures that endpoints created after this call do not 1941 // have a probe attached. Endpoints already created will continue to invoke 1942 // TCP probe. 1943 func (s *Stack) RemoveTCPProbe() { 1944 // This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics. 1945 s.tcpProbeFunc.Store(TCPProbeFunc(nil)) 1946 } 1947 1948 // JoinGroup joins the given multicast group on the given NIC. 1949 func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 1950 s.mu.RLock() 1951 defer s.mu.RUnlock() 1952 1953 if nic, ok := s.nics[nicID]; ok { 1954 return nic.joinGroup(protocol, multicastAddr) 1955 } 1956 return &tcpip.ErrUnknownNICID{} 1957 } 1958 1959 // LeaveGroup leaves the given multicast group on the given NIC. 1960 func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 1961 s.mu.RLock() 1962 defer s.mu.RUnlock() 1963 1964 if nic, ok := s.nics[nicID]; ok { 1965 return nic.leaveGroup(protocol, multicastAddr) 1966 } 1967 return &tcpip.ErrUnknownNICID{} 1968 } 1969 1970 // IsInGroup returns true if the NIC with ID nicID has joined the multicast 1971 // group multicastAddr. 1972 func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) { 1973 s.mu.RLock() 1974 defer s.mu.RUnlock() 1975 1976 if nic, ok := s.nics[nicID]; ok { 1977 return nic.isInGroup(multicastAddr), nil 1978 } 1979 return false, &tcpip.ErrUnknownNICID{} 1980 } 1981 1982 // IPTables returns the stack's iptables. 1983 func (s *Stack) IPTables() *IPTables { 1984 return s.tables 1985 } 1986 1987 // ICMPLimit returns the maximum number of ICMP messages that can be sent 1988 // in one second. 1989 func (s *Stack) ICMPLimit() rate.Limit { 1990 return s.icmpRateLimiter.Limit() 1991 } 1992 1993 // SetICMPLimit sets the maximum number of ICMP messages that be sent 1994 // in one second. 1995 func (s *Stack) SetICMPLimit(newLimit rate.Limit) { 1996 s.icmpRateLimiter.SetLimit(newLimit) 1997 } 1998 1999 // ICMPBurst returns the maximum number of ICMP messages that can be sent 2000 // in a single burst. 2001 func (s *Stack) ICMPBurst() int { 2002 return s.icmpRateLimiter.Burst() 2003 } 2004 2005 // SetICMPBurst sets the maximum number of ICMP messages that can be sent 2006 // in a single burst. 2007 func (s *Stack) SetICMPBurst(burst int) { 2008 s.icmpRateLimiter.SetBurst(burst) 2009 } 2010 2011 // AllowICMPMessage returns true if we the rate limiter allows at least one 2012 // ICMP message to be sent at this instant. 2013 func (s *Stack) AllowICMPMessage() bool { 2014 return s.icmpRateLimiter.Allow() 2015 } 2016 2017 // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol 2018 // number installed on the specified NIC. 2019 func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) { 2020 s.mu.Lock() 2021 defer s.mu.Unlock() 2022 2023 nic, ok := s.nics[nicID] 2024 if !ok { 2025 return nil, &tcpip.ErrUnknownNICID{} 2026 } 2027 2028 return nic.getNetworkEndpoint(proto), nil 2029 } 2030 2031 // NUDConfigurations gets the per-interface NUD configurations. 2032 func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) { 2033 s.mu.RLock() 2034 nic, ok := s.nics[id] 2035 s.mu.RUnlock() 2036 2037 if !ok { 2038 return NUDConfigurations{}, &tcpip.ErrUnknownNICID{} 2039 } 2040 2041 return nic.nudConfigs(proto) 2042 } 2043 2044 // SetNUDConfigurations sets the per-interface NUD configurations. 2045 // 2046 // Note, if c contains invalid NUD configuration values, it will be fixed to 2047 // use default values for the erroneous values. 2048 func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error { 2049 s.mu.RLock() 2050 nic, ok := s.nics[id] 2051 s.mu.RUnlock() 2052 2053 if !ok { 2054 return &tcpip.ErrUnknownNICID{} 2055 } 2056 2057 return nic.setNUDConfigs(proto, c) 2058 } 2059 2060 // Seed returns a 32 bit value that can be used as a seed value. 2061 // 2062 // NOTE: The seed is generated once during stack initialization only. 2063 func (s *Stack) Seed() uint32 { 2064 return s.seed 2065 } 2066 2067 // Rand returns a reference to a pseudo random generator that can be used 2068 // to generate random numbers as required. 2069 func (s *Stack) Rand() *rand.Rand { 2070 return s.randomGenerator 2071 } 2072 2073 // SecureRNG returns the stack's cryptographically secure random number 2074 // generator. 2075 func (s *Stack) SecureRNG() io.Reader { 2076 return s.secureRNG 2077 } 2078 2079 // FindNICNameFromID returns the name of the NIC for the given NICID. 2080 func (s *Stack) FindNICNameFromID(id tcpip.NICID) string { 2081 s.mu.RLock() 2082 defer s.mu.RUnlock() 2083 2084 nic, ok := s.nics[id] 2085 if !ok { 2086 return "" 2087 } 2088 2089 return nic.Name() 2090 } 2091 2092 // ParseResult indicates the result of a parsing attempt. 2093 type ParseResult int 2094 2095 const ( 2096 // ParsedOK indicates that a packet was successfully parsed. 2097 ParsedOK ParseResult = iota 2098 2099 // UnknownTransportProtocol indicates that the transport protocol is unknown. 2100 UnknownTransportProtocol 2101 2102 // TransportLayerParseError indicates that the transport packet was not 2103 // successfully parsed. 2104 TransportLayerParseError 2105 ) 2106 2107 // ParsePacketBufferTransport parses the provided packet buffer's transport 2108 // header. 2109 func (s *Stack) ParsePacketBufferTransport(protocol tcpip.TransportProtocolNumber, pkt PacketBufferPtr) ParseResult { 2110 pkt.TransportProtocolNumber = protocol 2111 // Parse the transport header if present. 2112 state, ok := s.transportProtocols[protocol] 2113 if !ok { 2114 return UnknownTransportProtocol 2115 } 2116 2117 if !state.proto.Parse(pkt) { 2118 return TransportLayerParseError 2119 } 2120 2121 return ParsedOK 2122 } 2123 2124 // networkProtocolNumbers returns the network protocol numbers the stack is 2125 // configured with. 2126 func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber { 2127 protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols)) 2128 for p := range s.networkProtocols { 2129 protos = append(protos, p) 2130 } 2131 return protos 2132 } 2133 2134 func isSubnetBroadcastOnNIC(nic *nic, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2135 addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint) 2136 if addressEndpoint == nil { 2137 return false 2138 } 2139 2140 subnet := addressEndpoint.Subnet() 2141 addressEndpoint.DecRef() 2142 return subnet.IsBroadcast(addr) 2143 } 2144 2145 // IsSubnetBroadcast returns true if the provided address is a subnet-local 2146 // broadcast address on the specified NIC and protocol. 2147 // 2148 // Returns false if the NIC is unknown or if the protocol is unknown or does 2149 // not support addressing. 2150 // 2151 // If the NIC is not specified, the stack will check all NICs. 2152 func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2153 s.mu.RLock() 2154 defer s.mu.RUnlock() 2155 2156 if nicID != 0 { 2157 nic, ok := s.nics[nicID] 2158 if !ok { 2159 return false 2160 } 2161 2162 return isSubnetBroadcastOnNIC(nic, protocol, addr) 2163 } 2164 2165 for _, nic := range s.nics { 2166 if isSubnetBroadcastOnNIC(nic, protocol, addr) { 2167 return true 2168 } 2169 } 2170 2171 return false 2172 } 2173 2174 // PacketEndpointWriteSupported returns true iff packet endpoints support write 2175 // operations. 2176 func (s *Stack) PacketEndpointWriteSupported() bool { 2177 return s.packetEndpointWriteSupported 2178 }