gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/tcpip/stack/stack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package stack provides the glue between networking protocols and the 16 // consumers of the networking stack. 17 // 18 // For consumers, the only function of interest is New(), everything else is 19 // provided by the tcpip/public package. 20 package stack 21 22 import ( 23 "encoding/binary" 24 "fmt" 25 "io" 26 "math/rand" 27 "sync/atomic" 28 "time" 29 30 "golang.org/x/time/rate" 31 "gvisor.dev/gvisor/pkg/atomicbitops" 32 "gvisor.dev/gvisor/pkg/buffer" 33 "gvisor.dev/gvisor/pkg/log" 34 cryptorand "gvisor.dev/gvisor/pkg/rand" 35 "gvisor.dev/gvisor/pkg/tcpip" 36 "gvisor.dev/gvisor/pkg/tcpip/header" 37 "gvisor.dev/gvisor/pkg/tcpip/ports" 38 "gvisor.dev/gvisor/pkg/waiter" 39 ) 40 41 const ( 42 // DefaultTOS is the default type of service value for network endpoints. 43 DefaultTOS = 0 44 ) 45 46 type transportProtocolState struct { 47 proto TransportProtocol 48 defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool 49 } 50 51 // RestoredEndpoint is an endpoint that needs to be restored. 52 type RestoredEndpoint interface { 53 // Restore restores an endpoint. This can be used to restart background 54 // workers such as protocol goroutines. This must be called after all 55 // indirect dependencies of the endpoint has been restored, which 56 // generally implies at the end of the restore process. 57 Restore(*Stack) 58 } 59 60 // ResumableEndpoint is an endpoint that needs to be resumed after save. 61 type ResumableEndpoint interface { 62 // Resume resumes an endpoint. 63 Resume() 64 } 65 66 // uniqueIDGenerator is a default unique ID generator. 67 type uniqueIDGenerator atomicbitops.Uint64 68 69 func (u *uniqueIDGenerator) UniqueID() uint64 { 70 return ((*atomicbitops.Uint64)(u)).Add(1) 71 } 72 73 var netRawMissingLogger = log.BasicRateLimitedLogger(time.Minute) 74 75 // Stack is a networking stack, with all supported protocols, NICs, and route 76 // table. 77 // 78 // LOCK ORDERING: mu > routeMu. 79 type Stack struct { 80 transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState 81 networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol 82 83 // rawFactory creates raw endpoints. If nil, raw endpoints are 84 // disabled. It is set during Stack creation and is immutable. 85 rawFactory RawFactory 86 packetEndpointWriteSupported bool 87 88 demux *transportDemuxer 89 90 stats tcpip.Stats 91 92 // routeMu protects annotated fields below. 93 routeMu routeStackRWMutex 94 95 // +checklocks:routeMu 96 routeTable []tcpip.Route 97 98 mu stackRWMutex 99 // +checklocks:mu 100 nics map[tcpip.NICID]*nic 101 defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{} 102 103 // cleanupEndpointsMu protects cleanupEndpoints. 104 cleanupEndpointsMu cleanupEndpointsMutex 105 // +checklocks:cleanupEndpointsMu 106 cleanupEndpoints map[TransportEndpoint]struct{} 107 108 *ports.PortManager 109 110 // If not nil, then any new endpoints will have this probe function 111 // invoked everytime they receive a TCP segment. 112 tcpProbeFunc atomic.Value // TCPProbeFunc 113 114 // clock is used to generate user-visible times. 115 clock tcpip.Clock 116 117 // handleLocal allows non-loopback interfaces to loop packets. 118 handleLocal bool 119 120 // tables are the iptables packet filtering and manipulation rules. 121 // TODO(gvisor.dev/issue/4595): S/R this field. 122 tables *IPTables 123 124 // restoredEndpoints is a list of endpoints that need to be restored if the 125 // stack is being restored. 126 restoredEndpoints []RestoredEndpoint 127 128 // resumableEndpoints is a list of endpoints that need to be resumed 129 // after save. 130 resumableEndpoints []ResumableEndpoint 131 132 // icmpRateLimiter is a global rate limiter for all ICMP messages generated 133 // by the stack. 134 icmpRateLimiter *ICMPRateLimiter 135 136 // seed is a one-time random value initialized at stack startup. 137 // 138 // TODO(gvisor.dev/issue/940): S/R this field. 139 seed uint32 140 141 // nudConfigs is the default NUD configurations used by interfaces. 142 nudConfigs NUDConfigurations 143 144 // nudDisp is the NUD event dispatcher that is used to send the netstack 145 // integrator NUD related events. 146 nudDisp NUDDispatcher 147 148 // uniqueIDGenerator is a generator of unique identifiers. 149 uniqueIDGenerator UniqueID 150 151 // randomGenerator is an injectable pseudo random generator that can be 152 // used when a random number is required. It must not be used in 153 // security-sensitive contexts. 154 insecureRNG *rand.Rand 155 156 // secureRNG is a cryptographically secure random number generator. 157 secureRNG cryptorand.RNG 158 159 // sendBufferSize holds the min/default/max send buffer sizes for 160 // endpoints other than TCP. 161 sendBufferSize tcpip.SendBufferSizeOption 162 163 // receiveBufferSize holds the min/default/max receive buffer sizes for 164 // endpoints other than TCP. 165 receiveBufferSize tcpip.ReceiveBufferSizeOption 166 167 // tcpInvalidRateLimit is the maximal rate for sending duplicate 168 // acknowledgements in response to incoming TCP packets that are for an existing 169 // connection but that are invalid due to any of the following reasons: 170 // 171 // a) out-of-window sequence number. 172 // b) out-of-window acknowledgement number. 173 // c) PAWS check failure (when implemented). 174 // 175 // This is required to prevent potential ACK loops. 176 // Setting this to 0 will disable all rate limiting. 177 tcpInvalidRateLimit time.Duration 178 179 // tsOffsetSecret is the secret key for generating timestamp offsets 180 // initialized at stack startup. 181 tsOffsetSecret uint32 182 } 183 184 // UniqueID is an abstract generator of unique identifiers. 185 type UniqueID interface { 186 UniqueID() uint64 187 } 188 189 // NetworkProtocolFactory instantiates a network protocol. 190 // 191 // NetworkProtocolFactory must not attempt to modify the stack, it may only 192 // query the stack. 193 type NetworkProtocolFactory func(*Stack) NetworkProtocol 194 195 // TransportProtocolFactory instantiates a transport protocol. 196 // 197 // TransportProtocolFactory must not attempt to modify the stack, it may only 198 // query the stack. 199 type TransportProtocolFactory func(*Stack) TransportProtocol 200 201 // Options contains optional Stack configuration. 202 type Options struct { 203 // NetworkProtocols lists the network protocols to enable. 204 NetworkProtocols []NetworkProtocolFactory 205 206 // TransportProtocols lists the transport protocols to enable. 207 TransportProtocols []TransportProtocolFactory 208 209 // Clock is an optional clock used for timekeeping. 210 // 211 // If Clock is nil, tcpip.NewStdClock() will be used. 212 Clock tcpip.Clock 213 214 // Stats are optional statistic counters. 215 Stats tcpip.Stats 216 217 // HandleLocal indicates whether packets destined to their source 218 // should be handled by the stack internally (true) or outside the 219 // stack (false). 220 HandleLocal bool 221 222 // UniqueID is an optional generator of unique identifiers. 223 UniqueID UniqueID 224 225 // NUDConfigs is the default NUD configurations used by interfaces. 226 NUDConfigs NUDConfigurations 227 228 // NUDDisp is the NUD event dispatcher that an integrator can provide to 229 // receive NUD related events. 230 NUDDisp NUDDispatcher 231 232 // RawFactory produces raw endpoints. Raw endpoints are enabled only if 233 // this is non-nil. 234 RawFactory RawFactory 235 236 // AllowPacketEndpointWrite determines if packet endpoints support write 237 // operations. 238 AllowPacketEndpointWrite bool 239 240 // RandSource is an optional source to use to generate random 241 // numbers. If omitted it defaults to a Source seeded by the data 242 // returned by the stack secure RNG. 243 // 244 // RandSource must be thread-safe. 245 RandSource rand.Source 246 247 // IPTables are the initial iptables rules. If nil, DefaultIPTables will be 248 // used to construct the initial iptables rules. 249 // all traffic. 250 IPTables *IPTables 251 252 // DefaultIPTables is an optional iptables rules constructor that is called 253 // if IPTables is nil. If both fields are nil, iptables will allow all 254 // traffic. 255 DefaultIPTables func(clock tcpip.Clock, rand *rand.Rand) *IPTables 256 257 // SecureRNG is a cryptographically secure random number generator. 258 SecureRNG io.Reader 259 } 260 261 // TransportEndpointInfo holds useful information about a transport endpoint 262 // which can be queried by monitoring tools. 263 // 264 // +stateify savable 265 type TransportEndpointInfo struct { 266 // The following fields are initialized at creation time and are 267 // immutable. 268 269 NetProto tcpip.NetworkProtocolNumber 270 TransProto tcpip.TransportProtocolNumber 271 272 // The following fields are protected by endpoint mu. 273 274 ID TransportEndpointID 275 // BindNICID and bindAddr are set via calls to Bind(). They are used to 276 // reject attempts to send data or connect via a different NIC or 277 // address 278 BindNICID tcpip.NICID 279 BindAddr tcpip.Address 280 // RegisterNICID is the default NICID registered as a side-effect of 281 // connect or datagram write. 282 RegisterNICID tcpip.NICID 283 } 284 285 // AddrNetProtoLocked unwraps the specified address if it is a V4-mapped V6 286 // address and returns the network protocol number to be used to communicate 287 // with the specified address. It returns an error if the passed address is 288 // incompatible with the receiver. 289 // 290 // Preconditon: the parent endpoint mu must be held while calling this method. 291 func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { 292 netProto := t.NetProto 293 switch addr.Addr.BitLen() { 294 case header.IPv4AddressSizeBits: 295 netProto = header.IPv4ProtocolNumber 296 case header.IPv6AddressSizeBits: 297 if header.IsV4MappedAddress(addr.Addr) { 298 netProto = header.IPv4ProtocolNumber 299 addr.Addr = tcpip.AddrFrom4Slice(addr.Addr.AsSlice()[header.IPv6AddressSize-header.IPv4AddressSize:]) 300 if addr.Addr == header.IPv4Any { 301 addr.Addr = tcpip.Address{} 302 } 303 } 304 } 305 306 switch t.ID.LocalAddress.BitLen() { 307 case header.IPv4AddressSizeBits: 308 if addr.Addr.BitLen() == header.IPv6AddressSizeBits { 309 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 310 } 311 case header.IPv6AddressSizeBits: 312 if addr.Addr.BitLen() == header.IPv4AddressSizeBits { 313 return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{} 314 } 315 } 316 317 switch { 318 case netProto == t.NetProto: 319 case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber: 320 if v6only { 321 return tcpip.FullAddress{}, 0, &tcpip.ErrHostUnreachable{} 322 } 323 default: 324 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 325 } 326 327 return addr, netProto, nil 328 } 329 330 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo 331 // marker interface. 332 func (*TransportEndpointInfo) IsEndpointInfo() {} 333 334 // New allocates a new networking stack with only the requested networking and 335 // transport protocols configured with default options. 336 // 337 // Note, NDPConfigurations will be fixed before being used by the Stack. That 338 // is, if an invalid value was provided, it will be reset to the default value. 339 // 340 // Protocol options can be changed by calling the 341 // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the 342 // stack. Please refer to individual protocol implementations as to what options 343 // are supported. 344 func New(opts Options) *Stack { 345 clock := opts.Clock 346 if clock == nil { 347 clock = tcpip.NewStdClock() 348 } 349 350 if opts.UniqueID == nil { 351 opts.UniqueID = new(uniqueIDGenerator) 352 } 353 354 if opts.SecureRNG == nil { 355 opts.SecureRNG = cryptorand.Reader 356 } 357 secureRNG := cryptorand.RNGFrom(opts.SecureRNG) 358 359 randSrc := opts.RandSource 360 if randSrc == nil { 361 var v int64 362 if err := binary.Read(opts.SecureRNG, binary.LittleEndian, &v); err != nil { 363 panic(err) 364 } 365 // Source provided by rand.NewSource is not thread-safe so 366 // we wrap it in a simple thread-safe version. 367 randSrc = &lockedRandomSource{src: rand.NewSource(v)} 368 } 369 insecureRNG := rand.New(randSrc) 370 371 if opts.IPTables == nil { 372 if opts.DefaultIPTables == nil { 373 opts.DefaultIPTables = DefaultTables 374 } 375 opts.IPTables = opts.DefaultIPTables(clock, insecureRNG) 376 } 377 378 opts.NUDConfigs.resetInvalidFields() 379 380 s := &Stack{ 381 transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState), 382 networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol), 383 nics: make(map[tcpip.NICID]*nic), 384 packetEndpointWriteSupported: opts.AllowPacketEndpointWrite, 385 defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}), 386 cleanupEndpoints: make(map[TransportEndpoint]struct{}), 387 PortManager: ports.NewPortManager(), 388 clock: clock, 389 stats: opts.Stats.FillIn(), 390 handleLocal: opts.HandleLocal, 391 tables: opts.IPTables, 392 icmpRateLimiter: NewICMPRateLimiter(clock), 393 seed: secureRNG.Uint32(), 394 nudConfigs: opts.NUDConfigs, 395 uniqueIDGenerator: opts.UniqueID, 396 nudDisp: opts.NUDDisp, 397 insecureRNG: insecureRNG, 398 secureRNG: secureRNG, 399 sendBufferSize: tcpip.SendBufferSizeOption{ 400 Min: MinBufferSize, 401 Default: DefaultBufferSize, 402 Max: DefaultMaxBufferSize, 403 }, 404 receiveBufferSize: tcpip.ReceiveBufferSizeOption{ 405 Min: MinBufferSize, 406 Default: DefaultBufferSize, 407 Max: DefaultMaxBufferSize, 408 }, 409 tcpInvalidRateLimit: defaultTCPInvalidRateLimit, 410 tsOffsetSecret: secureRNG.Uint32(), 411 } 412 413 // Add specified network protocols. 414 for _, netProtoFactory := range opts.NetworkProtocols { 415 netProto := netProtoFactory(s) 416 s.networkProtocols[netProto.Number()] = netProto 417 } 418 419 // Add specified transport protocols. 420 for _, transProtoFactory := range opts.TransportProtocols { 421 transProto := transProtoFactory(s) 422 s.transportProtocols[transProto.Number()] = &transportProtocolState{ 423 proto: transProto, 424 } 425 } 426 427 // Add the factory for raw endpoints, if present. 428 s.rawFactory = opts.RawFactory 429 430 // Create the global transport demuxer. 431 s.demux = newTransportDemuxer(s) 432 433 return s 434 } 435 436 // UniqueID returns a unique identifier. 437 func (s *Stack) UniqueID() uint64 { 438 return s.uniqueIDGenerator.UniqueID() 439 } 440 441 // SetNetworkProtocolOption allows configuring individual protocol level 442 // options. This method returns an error if the protocol is not supported or 443 // option is not supported by the protocol implementation or the provided value 444 // is incorrect. 445 func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error { 446 netProto, ok := s.networkProtocols[network] 447 if !ok { 448 return &tcpip.ErrUnknownProtocol{} 449 } 450 return netProto.SetOption(option) 451 } 452 453 // NetworkProtocolOption allows retrieving individual protocol level option 454 // values. This method returns an error if the protocol is not supported or 455 // option is not supported by the protocol implementation. E.g.: 456 // 457 // var v ipv4.MyOption 458 // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v) 459 // if err != nil { 460 // ... 461 // } 462 func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error { 463 netProto, ok := s.networkProtocols[network] 464 if !ok { 465 return &tcpip.ErrUnknownProtocol{} 466 } 467 return netProto.Option(option) 468 } 469 470 // SetTransportProtocolOption allows configuring individual protocol level 471 // options. This method returns an error if the protocol is not supported or 472 // option is not supported by the protocol implementation or the provided value 473 // is incorrect. 474 func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error { 475 transProtoState, ok := s.transportProtocols[transport] 476 if !ok { 477 return &tcpip.ErrUnknownProtocol{} 478 } 479 return transProtoState.proto.SetOption(option) 480 } 481 482 // TransportProtocolOption allows retrieving individual protocol level option 483 // values. This method returns an error if the protocol is not supported or 484 // option is not supported by the protocol implementation. 485 // 486 // var v tcp.SACKEnabled 487 // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil { 488 // ... 489 // } 490 func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error { 491 transProtoState, ok := s.transportProtocols[transport] 492 if !ok { 493 return &tcpip.ErrUnknownProtocol{} 494 } 495 return transProtoState.proto.Option(option) 496 } 497 498 // SendBufSizeProto is a protocol that can return its send buffer size. 499 type SendBufSizeProto interface { 500 SendBufferSize() tcpip.TCPSendBufferSizeRangeOption 501 } 502 503 // TCPSendBufferLimits returns the TCP send buffer size limit. 504 func (s *Stack) TCPSendBufferLimits() tcpip.TCPSendBufferSizeRangeOption { 505 return s.transportProtocols[header.TCPProtocolNumber].proto.(SendBufSizeProto).SendBufferSize() 506 } 507 508 // SetTransportProtocolHandler sets the per-stack default handler for the given 509 // protocol. 510 // 511 // It must be called only during initialization of the stack. Changing it as the 512 // stack is operating is not supported. 513 func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, *PacketBuffer) bool) { 514 state := s.transportProtocols[p] 515 if state != nil { 516 state.defaultHandler = h 517 } 518 } 519 520 // Clock returns the Stack's clock for retrieving the current time and 521 // scheduling work. 522 func (s *Stack) Clock() tcpip.Clock { 523 return s.clock 524 } 525 526 // Stats returns a mutable copy of the current stats. 527 // 528 // This is not generally exported via the public interface, but is available 529 // internally. 530 func (s *Stack) Stats() tcpip.Stats { 531 return s.stats 532 } 533 534 // SetNICForwarding enables or disables packet forwarding on the specified NIC 535 // for the passed protocol. 536 // 537 // Returns the previous configuration on the NIC. 538 func (s *Stack) SetNICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 539 s.mu.RLock() 540 defer s.mu.RUnlock() 541 542 nic, ok := s.nics[id] 543 if !ok { 544 return false, &tcpip.ErrUnknownNICID{} 545 } 546 547 return nic.setForwarding(protocol, enable) 548 } 549 550 // NICForwarding returns the forwarding configuration for the specified NIC. 551 func (s *Stack) NICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 552 s.mu.RLock() 553 defer s.mu.RUnlock() 554 555 nic, ok := s.nics[id] 556 if !ok { 557 return false, &tcpip.ErrUnknownNICID{} 558 } 559 560 return nic.forwarding(protocol) 561 } 562 563 // SetForwardingDefaultAndAllNICs sets packet forwarding for all NICs for the 564 // passed protocol and sets the default setting for newly created NICs. 565 func (s *Stack) SetForwardingDefaultAndAllNICs(protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error { 566 s.mu.Lock() 567 defer s.mu.Unlock() 568 569 doneOnce := false 570 for id, nic := range s.nics { 571 if _, err := nic.setForwarding(protocol, enable); err != nil { 572 // Expect forwarding to be settable on all interfaces if it was set on 573 // one. 574 if doneOnce { 575 panic(fmt.Sprintf("nic(id=%d).setForwarding(%d, %t): %s", id, protocol, enable, err)) 576 } 577 578 return err 579 } 580 581 doneOnce = true 582 } 583 584 if enable { 585 s.defaultForwardingEnabled[protocol] = struct{}{} 586 } else { 587 delete(s.defaultForwardingEnabled, protocol) 588 } 589 590 return nil 591 } 592 593 // AddMulticastRoute adds a multicast route to be used for the specified 594 // addresses and protocol. 595 func (s *Stack) AddMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination, route MulticastRoute) tcpip.Error { 596 netProto, ok := s.networkProtocols[protocol] 597 if !ok { 598 return &tcpip.ErrUnknownProtocol{} 599 } 600 601 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 602 if !ok { 603 return &tcpip.ErrNotSupported{} 604 } 605 606 return forwardingNetProto.AddMulticastRoute(addresses, route) 607 } 608 609 // RemoveMulticastRoute removes a multicast route that matches the specified 610 // addresses and protocol. 611 func (s *Stack) RemoveMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) tcpip.Error { 612 netProto, ok := s.networkProtocols[protocol] 613 if !ok { 614 return &tcpip.ErrUnknownProtocol{} 615 } 616 617 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 618 if !ok { 619 return &tcpip.ErrNotSupported{} 620 } 621 622 return forwardingNetProto.RemoveMulticastRoute(addresses) 623 } 624 625 // MulticastRouteLastUsedTime returns a monotonic timestamp that represents the 626 // last time that the route that matches the provided addresses and protocol 627 // was used or updated. 628 func (s *Stack) MulticastRouteLastUsedTime(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 629 netProto, ok := s.networkProtocols[protocol] 630 if !ok { 631 return tcpip.MonotonicTime{}, &tcpip.ErrUnknownProtocol{} 632 } 633 634 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 635 if !ok { 636 return tcpip.MonotonicTime{}, &tcpip.ErrNotSupported{} 637 } 638 639 return forwardingNetProto.MulticastRouteLastUsedTime(addresses) 640 } 641 642 // EnableMulticastForwardingForProtocol enables multicast forwarding for the 643 // provided protocol. 644 // 645 // Returns true if forwarding was already enabled on the protocol. 646 // Additionally, returns an error if: 647 // 648 // - The protocol is not found. 649 // - The protocol doesn't support multicast forwarding. 650 // - The multicast forwarding event dispatcher is nil. 651 // 652 // If successful, future multicast forwarding events will be sent to the 653 // provided event dispatcher. 654 func (s *Stack) EnableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber, disp MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 655 netProto, ok := s.networkProtocols[protocol] 656 if !ok { 657 return false, &tcpip.ErrUnknownProtocol{} 658 } 659 660 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 661 if !ok { 662 return false, &tcpip.ErrNotSupported{} 663 } 664 665 return forwardingNetProto.EnableMulticastForwarding(disp) 666 } 667 668 // DisableMulticastForwardingForProtocol disables multicast forwarding for the 669 // provided protocol. 670 // 671 // Returns an error if the provided protocol is not found or if it does not 672 // support multicast forwarding. 673 func (s *Stack) DisableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber) tcpip.Error { 674 netProto, ok := s.networkProtocols[protocol] 675 if !ok { 676 return &tcpip.ErrUnknownProtocol{} 677 } 678 679 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 680 if !ok { 681 return &tcpip.ErrNotSupported{} 682 } 683 684 forwardingNetProto.DisableMulticastForwarding() 685 return nil 686 } 687 688 // SetNICMulticastForwarding enables or disables multicast packet forwarding on 689 // the specified NIC for the passed protocol. 690 // 691 // Returns the previous configuration on the NIC. 692 // 693 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 694 // forwarding. Currently, setting this value is a no-op and is not ready for 695 // use. 696 func (s *Stack) SetNICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 697 s.mu.RLock() 698 defer s.mu.RUnlock() 699 700 nic, ok := s.nics[id] 701 if !ok { 702 return false, &tcpip.ErrUnknownNICID{} 703 } 704 705 return nic.setMulticastForwarding(protocol, enable) 706 } 707 708 // NICMulticastForwarding returns the multicast forwarding configuration for 709 // the specified NIC. 710 func (s *Stack) NICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 711 s.mu.RLock() 712 defer s.mu.RUnlock() 713 714 nic, ok := s.nics[id] 715 if !ok { 716 return false, &tcpip.ErrUnknownNICID{} 717 } 718 719 return nic.multicastForwarding(protocol) 720 } 721 722 // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in 723 // both IPv4 and IPv6. 724 func (s *Stack) PortRange() (uint16, uint16) { 725 return s.PortManager.PortRange() 726 } 727 728 // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range 729 // (inclusive). 730 func (s *Stack) SetPortRange(start uint16, end uint16) tcpip.Error { 731 return s.PortManager.SetPortRange(start, end) 732 } 733 734 // SetRouteTable assigns the route table to be used by this stack. It 735 // specifies which NIC to use for given destination address ranges. 736 // 737 // This method takes ownership of the table. 738 func (s *Stack) SetRouteTable(table []tcpip.Route) { 739 s.routeMu.Lock() 740 defer s.routeMu.Unlock() 741 s.routeTable = table 742 } 743 744 // GetRouteTable returns the route table which is currently in use. 745 func (s *Stack) GetRouteTable() []tcpip.Route { 746 s.routeMu.RLock() 747 defer s.routeMu.RUnlock() 748 return append([]tcpip.Route(nil), s.routeTable...) 749 } 750 751 // AddRoute appends a route to the route table. 752 func (s *Stack) AddRoute(route tcpip.Route) { 753 s.routeMu.Lock() 754 defer s.routeMu.Unlock() 755 s.routeTable = append(s.routeTable, route) 756 } 757 758 // RemoveRoutes removes matching routes from the route table. 759 func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) { 760 s.routeMu.Lock() 761 defer s.routeMu.Unlock() 762 763 var filteredRoutes []tcpip.Route 764 for _, route := range s.routeTable { 765 if !match(route) { 766 filteredRoutes = append(filteredRoutes, route) 767 } 768 } 769 s.routeTable = filteredRoutes 770 } 771 772 // NewEndpoint creates a new transport layer endpoint of the given protocol. 773 func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 774 t, ok := s.transportProtocols[transport] 775 if !ok { 776 return nil, &tcpip.ErrUnknownProtocol{} 777 } 778 779 return t.proto.NewEndpoint(network, waiterQueue) 780 } 781 782 // NewRawEndpoint creates a new raw transport layer endpoint of the given 783 // protocol. Raw endpoints receive all traffic for a given protocol regardless 784 // of address. 785 func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) { 786 if s.rawFactory == nil { 787 netRawMissingLogger.Infof("A process tried to create a raw socket, but --net-raw was not specified. Should runsc be run with --net-raw?") 788 return nil, &tcpip.ErrNotPermitted{} 789 } 790 791 if !associated { 792 return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue) 793 } 794 795 t, ok := s.transportProtocols[transport] 796 if !ok { 797 return nil, &tcpip.ErrUnknownProtocol{} 798 } 799 800 return t.proto.NewRawEndpoint(network, waiterQueue) 801 } 802 803 // NewPacketEndpoint creates a new packet endpoint listening for the given 804 // netProto. 805 func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 806 if s.rawFactory == nil { 807 return nil, &tcpip.ErrNotPermitted{} 808 } 809 810 return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue) 811 } 812 813 // NICContext is an opaque pointer used to store client-supplied NIC metadata. 814 type NICContext any 815 816 // NICOptions specifies the configuration of a NIC as it is being created. 817 // The zero value creates an enabled, unnamed NIC. 818 type NICOptions struct { 819 // Name specifies the name of the NIC. 820 Name string 821 822 // Disabled specifies whether to avoid calling Attach on the passed 823 // LinkEndpoint. 824 Disabled bool 825 826 // Context specifies user-defined data that will be returned in stack.NICInfo 827 // for the NIC. Clients of this library can use it to add metadata that 828 // should be tracked alongside a NIC, to avoid having to keep a 829 // map[tcpip.NICID]metadata mirroring stack.Stack's nic map. 830 Context NICContext 831 832 // QDisc is the queue discipline to use for this NIC. 833 QDisc QueueingDiscipline 834 835 // DeliverLinkPackets specifies whether the NIC is responsible for 836 // delivering raw packets to packet sockets. 837 DeliverLinkPackets bool 838 } 839 840 // CreateNICWithOptions creates a NIC with the provided id, LinkEndpoint, and 841 // NICOptions. See the documentation on type NICOptions for details on how 842 // NICs can be configured. 843 // 844 // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher. 845 func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error { 846 s.mu.Lock() 847 defer s.mu.Unlock() 848 849 if id == 0 { 850 return &tcpip.ErrInvalidNICID{} 851 } 852 // Make sure id is unique. 853 if _, ok := s.nics[id]; ok { 854 return &tcpip.ErrDuplicateNICID{} 855 } 856 857 // Make sure name is unique, unless unnamed. 858 if opts.Name != "" { 859 for _, n := range s.nics { 860 if n.Name() == opts.Name { 861 return &tcpip.ErrDuplicateNICID{} 862 } 863 } 864 } 865 866 n := newNIC(s, id, ep, opts) 867 for proto := range s.defaultForwardingEnabled { 868 if _, err := n.setForwarding(proto, true); err != nil { 869 panic(fmt.Sprintf("newNIC(%d, ...).setForwarding(%d, true): %s", id, proto, err)) 870 } 871 } 872 s.nics[id] = n 873 if !opts.Disabled { 874 return n.enable() 875 } 876 877 return nil 878 } 879 880 // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls 881 // LinkEndpoint.Attach to bind ep with a NetworkDispatcher. 882 func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error { 883 return s.CreateNICWithOptions(id, ep, NICOptions{}) 884 } 885 886 // GetLinkEndpointByName gets the link endpoint specified by name. 887 func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint { 888 s.mu.RLock() 889 defer s.mu.RUnlock() 890 for _, nic := range s.nics { 891 if nic.Name() == name { 892 linkEP, ok := nic.NetworkLinkEndpoint.(LinkEndpoint) 893 if !ok { 894 panic(fmt.Sprintf("unexpected NetworkLinkEndpoint(%#v) is not a LinkEndpoint", nic.NetworkLinkEndpoint)) 895 } 896 return linkEP 897 } 898 } 899 return nil 900 } 901 902 // EnableNIC enables the given NIC so that the link-layer endpoint can start 903 // delivering packets to it. 904 func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error { 905 s.mu.RLock() 906 defer s.mu.RUnlock() 907 908 nic, ok := s.nics[id] 909 if !ok { 910 return &tcpip.ErrUnknownNICID{} 911 } 912 913 return nic.enable() 914 } 915 916 // DisableNIC disables the given NIC. 917 func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error { 918 s.mu.RLock() 919 defer s.mu.RUnlock() 920 921 nic, ok := s.nics[id] 922 if !ok { 923 return &tcpip.ErrUnknownNICID{} 924 } 925 926 nic.disable() 927 return nil 928 } 929 930 // CheckNIC checks if a NIC is usable. 931 func (s *Stack) CheckNIC(id tcpip.NICID) bool { 932 s.mu.RLock() 933 defer s.mu.RUnlock() 934 935 nic, ok := s.nics[id] 936 if !ok { 937 return false 938 } 939 940 return nic.Enabled() 941 } 942 943 // RemoveNIC removes NIC and all related routes from the network stack. 944 func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error { 945 s.mu.Lock() 946 defer s.mu.Unlock() 947 948 return s.removeNICLocked(id) 949 } 950 951 // removeNICLocked removes NIC and all related routes from the network stack. 952 // 953 // +checklocks:s.mu 954 func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error { 955 nic, ok := s.nics[id] 956 if !ok { 957 return &tcpip.ErrUnknownNICID{} 958 } 959 delete(s.nics, id) 960 961 // Remove routes in-place. n tracks the number of routes written. 962 s.routeMu.Lock() 963 n := 0 964 for _, r := range s.routeTable { 965 if r.NIC != id { 966 // Keep this route. 967 s.routeTable[n] = r 968 n++ 969 } 970 } 971 clear(s.routeTable[n:]) 972 s.routeTable = s.routeTable[:n] 973 s.routeMu.Unlock() 974 975 return nic.remove() 976 } 977 978 // NICInfo captures the name and addresses assigned to a NIC. 979 type NICInfo struct { 980 Name string 981 LinkAddress tcpip.LinkAddress 982 ProtocolAddresses []tcpip.ProtocolAddress 983 984 // Flags indicate the state of the NIC. 985 Flags NICStateFlags 986 987 // MTU is the maximum transmission unit. 988 MTU uint32 989 990 Stats tcpip.NICStats 991 992 // NetworkStats holds the stats of each NetworkEndpoint bound to the NIC. 993 NetworkStats map[tcpip.NetworkProtocolNumber]NetworkEndpointStats 994 995 // Context is user-supplied data optionally supplied in CreateNICWithOptions. 996 // See type NICOptions for more details. 997 Context NICContext 998 999 // ARPHardwareType holds the ARP Hardware type of the NIC. This is the 1000 // value sent in haType field of an ARP Request sent by this NIC and the 1001 // value expected in the haType field of an ARP response. 1002 ARPHardwareType header.ARPHardwareType 1003 1004 // Forwarding holds the forwarding status for each network endpoint that 1005 // supports forwarding. 1006 Forwarding map[tcpip.NetworkProtocolNumber]bool 1007 1008 // MulticastForwarding holds the forwarding status for each network endpoint 1009 // that supports multicast forwarding. 1010 MulticastForwarding map[tcpip.NetworkProtocolNumber]bool 1011 } 1012 1013 // HasNIC returns true if the NICID is defined in the stack. 1014 func (s *Stack) HasNIC(id tcpip.NICID) bool { 1015 s.mu.RLock() 1016 _, ok := s.nics[id] 1017 s.mu.RUnlock() 1018 return ok 1019 } 1020 1021 // NICInfo returns a map of NICIDs to their associated information. 1022 func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo { 1023 s.mu.RLock() 1024 defer s.mu.RUnlock() 1025 1026 type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error) 1027 forwardingValue := func(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) { 1028 switch forwarding, err := forwardingFn(proto); err.(type) { 1029 case nil: 1030 return forwarding, true 1031 case *tcpip.ErrUnknownProtocol: 1032 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID)) 1033 case *tcpip.ErrNotSupported: 1034 // Not all network protocols support forwarding. 1035 default: 1036 panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err)) 1037 } 1038 return false, false 1039 } 1040 1041 nics := make(map[tcpip.NICID]NICInfo) 1042 for id, nic := range s.nics { 1043 flags := NICStateFlags{ 1044 Up: true, // Netstack interfaces are always up. 1045 Running: nic.Enabled(), 1046 Promiscuous: nic.Promiscuous(), 1047 Loopback: nic.IsLoopback(), 1048 } 1049 1050 netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats) 1051 for proto, netEP := range nic.networkEndpoints { 1052 netStats[proto] = netEP.Stats() 1053 } 1054 1055 info := NICInfo{ 1056 Name: nic.name, 1057 LinkAddress: nic.NetworkLinkEndpoint.LinkAddress(), 1058 ProtocolAddresses: nic.primaryAddresses(), 1059 Flags: flags, 1060 MTU: nic.NetworkLinkEndpoint.MTU(), 1061 Stats: nic.stats.local, 1062 NetworkStats: netStats, 1063 Context: nic.context, 1064 ARPHardwareType: nic.NetworkLinkEndpoint.ARPHardwareType(), 1065 Forwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1066 MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1067 } 1068 1069 for proto := range s.networkProtocols { 1070 if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok { 1071 info.Forwarding[proto] = forwarding 1072 } 1073 1074 if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok { 1075 info.MulticastForwarding[proto] = multicastForwarding 1076 } 1077 } 1078 1079 nics[id] = info 1080 } 1081 return nics 1082 } 1083 1084 // NICStateFlags holds information about the state of an NIC. 1085 type NICStateFlags struct { 1086 // Up indicates whether the interface is running. 1087 Up bool 1088 1089 // Running indicates whether resources are allocated. 1090 Running bool 1091 1092 // Promiscuous indicates whether the interface is in promiscuous mode. 1093 Promiscuous bool 1094 1095 // Loopback indicates whether the interface is a loopback. 1096 Loopback bool 1097 } 1098 1099 // AddProtocolAddress adds an address to the specified NIC, possibly with extra 1100 // properties. 1101 func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error { 1102 s.mu.RLock() 1103 defer s.mu.RUnlock() 1104 1105 nic, ok := s.nics[id] 1106 if !ok { 1107 return &tcpip.ErrUnknownNICID{} 1108 } 1109 1110 return nic.addAddress(protocolAddress, properties) 1111 } 1112 1113 // RemoveAddress removes an existing network-layer address from the specified 1114 // NIC. 1115 func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error { 1116 s.mu.RLock() 1117 defer s.mu.RUnlock() 1118 1119 if nic, ok := s.nics[id]; ok { 1120 return nic.removeAddress(addr) 1121 } 1122 1123 return &tcpip.ErrUnknownNICID{} 1124 } 1125 1126 // SetAddressLifetimes sets informational preferred and valid lifetimes, and 1127 // whether the address should be preferred or deprecated. 1128 func (s *Stack) SetAddressLifetimes(id tcpip.NICID, addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error { 1129 s.mu.RLock() 1130 defer s.mu.RUnlock() 1131 1132 if nic, ok := s.nics[id]; ok { 1133 return nic.setAddressLifetimes(addr, lifetimes) 1134 } 1135 1136 return &tcpip.ErrUnknownNICID{} 1137 } 1138 1139 // AllAddresses returns a map of NICIDs to their protocol addresses (primary 1140 // and non-primary). 1141 func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress { 1142 s.mu.RLock() 1143 defer s.mu.RUnlock() 1144 1145 nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress) 1146 for id, nic := range s.nics { 1147 nics[id] = nic.allPermanentAddresses() 1148 } 1149 return nics 1150 } 1151 1152 // GetMainNICAddress returns the first non-deprecated primary address and prefix 1153 // for the given NIC and protocol. If no non-deprecated primary addresses exist, 1154 // a deprecated address will be returned. If no deprecated addresses exist, the 1155 // zero value will be returned. 1156 func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) { 1157 s.mu.RLock() 1158 defer s.mu.RUnlock() 1159 1160 nic, ok := s.nics[id] 1161 if !ok { 1162 return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownNICID{} 1163 } 1164 1165 return nic.PrimaryAddress(protocol) 1166 } 1167 1168 func (s *Stack) getAddressEP(nic *nic, localAddr, remoteAddr, srcHint tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint { 1169 if localAddr.BitLen() == 0 { 1170 return nic.primaryEndpoint(netProto, remoteAddr, srcHint) 1171 } 1172 return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint) 1173 } 1174 1175 // NewRouteForMulticast returns a Route that may be used to forward multicast 1176 // packets. 1177 // 1178 // Returns nil if validation fails. 1179 func (s *Stack) NewRouteForMulticast(nicID tcpip.NICID, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1180 s.mu.RLock() 1181 defer s.mu.RUnlock() 1182 1183 nic, ok := s.nics[nicID] 1184 if !ok || !nic.Enabled() { 1185 return nil 1186 } 1187 1188 if addressEndpoint := s.getAddressEP(nic, tcpip.Address{} /* localAddr */, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil { 1189 return constructAndValidateRoute(netProto, addressEndpoint, nic, nic, tcpip.Address{} /* gateway */, tcpip.Address{} /* localAddr */, remoteAddr, s.handleLocal, false /* multicastLoop */) 1190 } 1191 return nil 1192 } 1193 1194 // findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route 1195 // from the specified NIC. 1196 // 1197 // +checklocksread:s.mu 1198 func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1199 localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint) 1200 if localAddressEndpoint == nil { 1201 return nil 1202 } 1203 1204 var outgoingNIC *nic 1205 // Prefer a local route to the same interface as the local address. 1206 if localAddressNIC.hasAddress(netProto, remoteAddr) { 1207 outgoingNIC = localAddressNIC 1208 } 1209 1210 // If the remote address isn't owned by the local address's NIC, check all 1211 // NICs. 1212 if outgoingNIC == nil { 1213 for _, nic := range s.nics { 1214 if nic.hasAddress(netProto, remoteAddr) { 1215 outgoingNIC = nic 1216 break 1217 } 1218 } 1219 } 1220 1221 // If the remote address is not owned by the stack, we can't return a local 1222 // route. 1223 if outgoingNIC == nil { 1224 localAddressEndpoint.DecRef() 1225 return nil 1226 } 1227 1228 r := makeLocalRoute( 1229 netProto, 1230 localAddr, 1231 remoteAddr, 1232 outgoingNIC, 1233 localAddressNIC, 1234 localAddressEndpoint, 1235 ) 1236 1237 if r.IsOutboundBroadcast() { 1238 r.Release() 1239 return nil 1240 } 1241 1242 return r 1243 } 1244 1245 // findLocalRouteRLocked returns a local route. 1246 // 1247 // A local route is a route to some remote address which the stack owns. That 1248 // is, a local route is a route where packets never have to leave the stack. 1249 // 1250 // +checklocksread:s.mu 1251 func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1252 if localAddr.BitLen() == 0 { 1253 localAddr = remoteAddr 1254 } 1255 1256 if localAddressNICID == 0 { 1257 for _, localAddressNIC := range s.nics { 1258 if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil { 1259 return r 1260 } 1261 } 1262 1263 return nil 1264 } 1265 1266 if localAddressNIC, ok := s.nics[localAddressNICID]; ok { 1267 return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto) 1268 } 1269 1270 return nil 1271 } 1272 1273 // HandleLocal returns true if non-loopback interfaces are allowed to loop packets. 1274 func (s *Stack) HandleLocal() bool { 1275 return s.handleLocal 1276 } 1277 1278 func isNICForwarding(nic *nic, proto tcpip.NetworkProtocolNumber) bool { 1279 switch forwarding, err := nic.forwarding(proto); err.(type) { 1280 case nil: 1281 return forwarding 1282 case *tcpip.ErrUnknownProtocol: 1283 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID())) 1284 case *tcpip.ErrNotSupported: 1285 // Not all network protocols support forwarding. 1286 return false 1287 default: 1288 panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err)) 1289 } 1290 } 1291 1292 // findRouteWithLocalAddrFromAnyInterfaceRLocked returns a route to the given 1293 // destination address, leaving through the given NIC. 1294 // 1295 // Rather than preferring to find a route that uses a local address assigned to 1296 // the outgoing interface, it finds any NIC that holds a matching local address 1297 // endpoint. 1298 // 1299 // +checklocksread:s.mu 1300 func (s *Stack) findRouteWithLocalAddrFromAnyInterfaceRLocked(outgoingNIC *nic, localAddr, remoteAddr, srcHint, gateway tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) *Route { 1301 for _, aNIC := range s.nics { 1302 addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, srcHint, netProto) 1303 if addressEndpoint == nil { 1304 continue 1305 } 1306 1307 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, outgoingNIC, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1308 return r 1309 } 1310 } 1311 return nil 1312 } 1313 1314 // FindRoute creates a route to the given destination address, leaving through 1315 // the given NIC and local address (if provided). 1316 // 1317 // If a NIC is not specified, the returned route will leave through the same 1318 // NIC as the NIC that has the local address assigned when forwarding is 1319 // disabled. If forwarding is enabled and the NIC is unspecified, the route may 1320 // leave through any interface unless the route is link-local. 1321 // 1322 // If no local address is provided, the stack will select a local address. If no 1323 // remote address is provided, the stack will use a remote address equal to the 1324 // local address. 1325 func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) { 1326 s.mu.RLock() 1327 defer s.mu.RUnlock() 1328 1329 // Reject attempts to use unsupported protocols. 1330 if !s.CheckNetworkProtocol(netProto) { 1331 return nil, &tcpip.ErrUnknownProtocol{} 1332 } 1333 1334 isLinkLocal := header.IsV6LinkLocalUnicastAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr) 1335 isLocalBroadcast := remoteAddr == header.IPv4Broadcast 1336 isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr) 1337 isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr) 1338 needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback) 1339 1340 if s.handleLocal && !isMulticast && !isLocalBroadcast { 1341 if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil { 1342 return r, nil 1343 } 1344 } 1345 1346 // If the interface is specified and we do not need a route, return a route 1347 // through the interface if the interface is valid and enabled. 1348 if id != 0 && !needRoute { 1349 if nic, ok := s.nics[id]; ok && nic.Enabled() { 1350 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil { 1351 return makeRoute( 1352 netProto, 1353 tcpip.Address{}, /* gateway */ 1354 localAddr, 1355 remoteAddr, 1356 nic, /* outboundNIC */ 1357 nic, /* localAddressNIC*/ 1358 addressEndpoint, 1359 s.handleLocal, 1360 multicastLoop, 1361 ), nil 1362 } 1363 } 1364 1365 if isLoopback { 1366 return nil, &tcpip.ErrBadLocalAddress{} 1367 } 1368 return nil, &tcpip.ErrNetworkUnreachable{} 1369 } 1370 1371 onlyGlobalAddresses := !header.IsV6LinkLocalUnicastAddress(localAddr) && !isLinkLocal 1372 1373 // Find a route to the remote with the route table. 1374 var chosenRoute tcpip.Route 1375 if r := func() *Route { 1376 s.routeMu.RLock() 1377 defer s.routeMu.RUnlock() 1378 1379 for _, route := range s.routeTable { 1380 if remoteAddr.BitLen() != 0 && !route.Destination.Contains(remoteAddr) { 1381 continue 1382 } 1383 1384 nic, ok := s.nics[route.NIC] 1385 if !ok || !nic.Enabled() { 1386 continue 1387 } 1388 1389 if id == 0 || id == route.NIC { 1390 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, route.SourceHint, netProto); addressEndpoint != nil { 1391 var gateway tcpip.Address 1392 if needRoute { 1393 gateway = route.Gateway 1394 } 1395 r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop) 1396 if r == nil { 1397 panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr)) 1398 } 1399 return r 1400 } 1401 } 1402 1403 // If the stack has forwarding enabled, we haven't found a valid route to 1404 // the remote address yet, and we are routing locally generated traffic, 1405 // keep track of the first valid route. We keep iterating because we 1406 // prefer routes that let us use a local address that is assigned to the 1407 // outgoing interface. There is no requirement to do this from any RFC 1408 // but simply a choice made to better follow a strong host model which 1409 // the netstack follows at the time of writing. 1410 // 1411 // Note that for incoming traffic that we are forwarding (for which the 1412 // NIC and local address are unspecified), we do not keep iterating, as 1413 // there is no reason to prefer routes that let us use a local address 1414 // when routing forwarded (as opposed to locally-generated) traffic. 1415 locallyGenerated := (id != 0 || localAddr != tcpip.Address{}) 1416 if onlyGlobalAddresses && chosenRoute.Equal(tcpip.Route{}) && isNICForwarding(nic, netProto) { 1417 if locallyGenerated { 1418 chosenRoute = route 1419 continue 1420 } 1421 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, route.SourceHint, route.Gateway, netProto, multicastLoop); r != nil { 1422 return r 1423 } 1424 } 1425 } 1426 1427 return nil 1428 }(); r != nil { 1429 return r, nil 1430 } 1431 1432 if !chosenRoute.Equal(tcpip.Route{}) { 1433 // At this point we know the stack has forwarding enabled since chosenRoute is 1434 // only set when forwarding is enabled. 1435 nic, ok := s.nics[chosenRoute.NIC] 1436 if !ok { 1437 // If the route's NIC was invalid, we should not have chosen the route. 1438 panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC)) 1439 } 1440 1441 var gateway tcpip.Address 1442 if needRoute { 1443 gateway = chosenRoute.Gateway 1444 } 1445 1446 // Use the specified NIC to get the local address endpoint. 1447 if id != 0 { 1448 if aNIC, ok := s.nics[id]; ok { 1449 if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, chosenRoute.SourceHint, netProto); addressEndpoint != nil { 1450 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1451 return r, nil 1452 } 1453 } 1454 } 1455 1456 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1457 return nil, &tcpip.ErrHostUnreachable{} 1458 } 1459 1460 if id == 0 { 1461 // If an interface is not specified, try to find a NIC that holds the local 1462 // address endpoint to construct a route. 1463 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, chosenRoute.SourceHint, gateway, netProto, multicastLoop); r != nil { 1464 return r, nil 1465 } 1466 } 1467 } 1468 1469 if needRoute { 1470 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1471 return nil, &tcpip.ErrHostUnreachable{} 1472 } 1473 if header.IsV6LoopbackAddress(remoteAddr) { 1474 return nil, &tcpip.ErrBadLocalAddress{} 1475 } 1476 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1477 return nil, &tcpip.ErrNetworkUnreachable{} 1478 } 1479 1480 // CheckNetworkProtocol checks if a given network protocol is enabled in the 1481 // stack. 1482 func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool { 1483 _, ok := s.networkProtocols[protocol] 1484 return ok 1485 } 1486 1487 // CheckDuplicateAddress performs duplicate address detection for the address on 1488 // the specified interface. 1489 func (s *Stack) CheckDuplicateAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) { 1490 s.mu.RLock() 1491 nic, ok := s.nics[nicID] 1492 s.mu.RUnlock() 1493 1494 if !ok { 1495 return 0, &tcpip.ErrUnknownNICID{} 1496 } 1497 1498 return nic.checkDuplicateAddress(protocol, addr, h) 1499 } 1500 1501 // CheckLocalAddress determines if the given local address exists, and if it 1502 // does, returns the id of the NIC it's bound to. Returns 0 if the address 1503 // does not exist. 1504 func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID { 1505 s.mu.RLock() 1506 defer s.mu.RUnlock() 1507 1508 // If a NIC is specified, use its NIC id. 1509 if nicID != 0 { 1510 nic, ok := s.nics[nicID] 1511 if !ok { 1512 return 0 1513 } 1514 // In IPv4, linux only checks the interface. If it matches, then it does 1515 // not bother with the address. 1516 // https://github.com/torvalds/linux/blob/15205c2829ca2cbb5ece5ceaafe1171a8470e62b/net/ipv4/igmp.c#L1829-L1837 1517 if protocol == header.IPv4ProtocolNumber { 1518 return nic.id 1519 } 1520 if nic.CheckLocalAddress(protocol, addr) { 1521 return nic.id 1522 } 1523 return 0 1524 } 1525 1526 // Go through all the NICs. 1527 for _, nic := range s.nics { 1528 if nic.CheckLocalAddress(protocol, addr) { 1529 return nic.id 1530 } 1531 } 1532 1533 return 0 1534 } 1535 1536 // SetPromiscuousMode enables or disables promiscuous mode in the given NIC. 1537 func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error { 1538 s.mu.RLock() 1539 defer s.mu.RUnlock() 1540 1541 nic, ok := s.nics[nicID] 1542 if !ok { 1543 return &tcpip.ErrUnknownNICID{} 1544 } 1545 1546 nic.setPromiscuousMode(enable) 1547 1548 return nil 1549 } 1550 1551 // SetSpoofing enables or disables address spoofing in the given NIC, allowing 1552 // endpoints to bind to any address in the NIC. 1553 func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error { 1554 s.mu.RLock() 1555 defer s.mu.RUnlock() 1556 1557 nic, ok := s.nics[nicID] 1558 if !ok { 1559 return &tcpip.ErrUnknownNICID{} 1560 } 1561 1562 nic.setSpoofing(enable) 1563 1564 return nil 1565 } 1566 1567 // LinkResolutionResult is the result of a link address resolution attempt. 1568 type LinkResolutionResult struct { 1569 LinkAddress tcpip.LinkAddress 1570 Err tcpip.Error 1571 } 1572 1573 // GetLinkAddress finds the link address corresponding to a network address. 1574 // 1575 // Returns ErrNotSupported if the stack is not configured with a link address 1576 // resolver for the specified network protocol. 1577 // 1578 // Returns ErrWouldBlock if the link address is not readily available, along 1579 // with a notification channel for the caller to block on. Triggers address 1580 // resolution asynchronously. 1581 // 1582 // onResolve will be called either immediately, if resolution is not required, 1583 // or when address resolution is complete, with the resolved link address and 1584 // whether resolution succeeded. 1585 // 1586 // If specified, the local address must be an address local to the interface 1587 // the neighbor cache belongs to. The local address is the source address of 1588 // a packet prompting NUD/link address resolution. 1589 func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error { 1590 s.mu.RLock() 1591 nic, ok := s.nics[nicID] 1592 s.mu.RUnlock() 1593 if !ok { 1594 return &tcpip.ErrUnknownNICID{} 1595 } 1596 1597 return nic.getLinkAddress(addr, localAddr, protocol, onResolve) 1598 } 1599 1600 // Neighbors returns all IP to MAC address associations. 1601 func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) { 1602 s.mu.RLock() 1603 nic, ok := s.nics[nicID] 1604 s.mu.RUnlock() 1605 1606 if !ok { 1607 return nil, &tcpip.ErrUnknownNICID{} 1608 } 1609 1610 return nic.neighbors(protocol) 1611 } 1612 1613 // AddStaticNeighbor statically associates an IP address to a MAC address. 1614 func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error { 1615 s.mu.RLock() 1616 nic, ok := s.nics[nicID] 1617 s.mu.RUnlock() 1618 1619 if !ok { 1620 return &tcpip.ErrUnknownNICID{} 1621 } 1622 1623 return nic.addStaticNeighbor(addr, protocol, linkAddr) 1624 } 1625 1626 // RemoveNeighbor removes an IP to MAC address association previously created 1627 // either automatically or by AddStaticNeighbor. Returns ErrBadAddress if there 1628 // is no association with the provided address. 1629 func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { 1630 s.mu.RLock() 1631 nic, ok := s.nics[nicID] 1632 s.mu.RUnlock() 1633 1634 if !ok { 1635 return &tcpip.ErrUnknownNICID{} 1636 } 1637 1638 return nic.removeNeighbor(protocol, addr) 1639 } 1640 1641 // ClearNeighbors removes all IP to MAC address associations. 1642 func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error { 1643 s.mu.RLock() 1644 nic, ok := s.nics[nicID] 1645 s.mu.RUnlock() 1646 1647 if !ok { 1648 return &tcpip.ErrUnknownNICID{} 1649 } 1650 1651 return nic.clearNeighbors(protocol) 1652 } 1653 1654 // RegisterTransportEndpoint registers the given endpoint with the stack 1655 // transport dispatcher. Received packets that match the provided id will be 1656 // delivered to the given endpoint; specifying a nic is optional, but 1657 // nic-specific IDs have precedence over global ones. 1658 func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1659 return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1660 } 1661 1662 // CheckRegisterTransportEndpoint checks if an endpoint can be registered with 1663 // the stack transport dispatcher. 1664 func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1665 return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice) 1666 } 1667 1668 // UnregisterTransportEndpoint removes the endpoint with the given id from the 1669 // stack transport dispatcher. 1670 func (s *Stack) UnregisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1671 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1672 } 1673 1674 // StartTransportEndpointCleanup removes the endpoint with the given id from 1675 // the stack transport dispatcher. It also transitions it to the cleanup stage. 1676 func (s *Stack) StartTransportEndpointCleanup(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1677 s.cleanupEndpointsMu.Lock() 1678 s.cleanupEndpoints[ep] = struct{}{} 1679 s.cleanupEndpointsMu.Unlock() 1680 1681 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1682 } 1683 1684 // CompleteTransportEndpointCleanup removes the endpoint from the cleanup 1685 // stage. 1686 func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) { 1687 s.cleanupEndpointsMu.Lock() 1688 delete(s.cleanupEndpoints, ep) 1689 s.cleanupEndpointsMu.Unlock() 1690 } 1691 1692 // FindTransportEndpoint finds an endpoint that most closely matches the provided 1693 // id. If no endpoint is found it returns nil. 1694 func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint { 1695 return s.demux.findTransportEndpoint(netProto, transProto, id, nicID) 1696 } 1697 1698 // RegisterRawTransportEndpoint registers the given endpoint with the stack 1699 // transport dispatcher. Received packets that match the provided transport 1700 // protocol will be delivered to the given endpoint. 1701 func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error { 1702 return s.demux.registerRawEndpoint(netProto, transProto, ep) 1703 } 1704 1705 // UnregisterRawTransportEndpoint removes the endpoint for the transport 1706 // protocol from the stack transport dispatcher. 1707 func (s *Stack) UnregisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) { 1708 s.demux.unregisterRawEndpoint(netProto, transProto, ep) 1709 } 1710 1711 // RegisterRestoredEndpoint records e as an endpoint that has been restored on 1712 // this stack. 1713 func (s *Stack) RegisterRestoredEndpoint(e RestoredEndpoint) { 1714 s.mu.Lock() 1715 defer s.mu.Unlock() 1716 1717 s.restoredEndpoints = append(s.restoredEndpoints, e) 1718 } 1719 1720 // RegisterResumableEndpoint records e as an endpoint that has to be resumed. 1721 func (s *Stack) RegisterResumableEndpoint(e ResumableEndpoint) { 1722 s.mu.Lock() 1723 defer s.mu.Unlock() 1724 1725 s.resumableEndpoints = append(s.resumableEndpoints, e) 1726 } 1727 1728 // RegisteredEndpoints returns all endpoints which are currently registered. 1729 func (s *Stack) RegisteredEndpoints() []TransportEndpoint { 1730 s.mu.Lock() 1731 defer s.mu.Unlock() 1732 1733 var es []TransportEndpoint 1734 for _, e := range s.demux.protocol { 1735 es = append(es, e.transportEndpoints()...) 1736 } 1737 return es 1738 } 1739 1740 // CleanupEndpoints returns endpoints currently in the cleanup state. 1741 func (s *Stack) CleanupEndpoints() []TransportEndpoint { 1742 s.cleanupEndpointsMu.Lock() 1743 defer s.cleanupEndpointsMu.Unlock() 1744 1745 es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints)) 1746 for e := range s.cleanupEndpoints { 1747 es = append(es, e) 1748 } 1749 return es 1750 } 1751 1752 // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful 1753 // for restoring a stack after a save. 1754 func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) { 1755 s.cleanupEndpointsMu.Lock() 1756 defer s.cleanupEndpointsMu.Unlock() 1757 1758 for _, e := range es { 1759 s.cleanupEndpoints[e] = struct{}{} 1760 } 1761 } 1762 1763 // Close closes all currently registered transport endpoints. 1764 // 1765 // Endpoints created or modified during this call may not get closed. 1766 func (s *Stack) Close() { 1767 for _, e := range s.RegisteredEndpoints() { 1768 e.Abort() 1769 } 1770 for _, p := range s.transportProtocols { 1771 p.proto.Close() 1772 } 1773 for _, p := range s.networkProtocols { 1774 p.Close() 1775 } 1776 } 1777 1778 // Wait waits for all transport and link endpoints to halt their worker 1779 // goroutines. 1780 // 1781 // Endpoints created or modified during this call may not get waited on. 1782 // 1783 // Note that link endpoints must be stopped via an implementation specific 1784 // mechanism. 1785 func (s *Stack) Wait() { 1786 for _, e := range s.RegisteredEndpoints() { 1787 e.Wait() 1788 } 1789 for _, e := range s.CleanupEndpoints() { 1790 e.Wait() 1791 } 1792 for _, p := range s.transportProtocols { 1793 p.proto.Wait() 1794 } 1795 for _, p := range s.networkProtocols { 1796 p.Wait() 1797 } 1798 1799 s.mu.Lock() 1800 defer s.mu.Unlock() 1801 1802 for id, n := range s.nics { 1803 // Remove NIC to ensure that qDisc goroutines are correctly 1804 // terminated on stack teardown. 1805 s.removeNICLocked(id) 1806 n.NetworkLinkEndpoint.Wait() 1807 } 1808 } 1809 1810 // Destroy destroys the stack with all endpoints. 1811 func (s *Stack) Destroy() { 1812 s.Close() 1813 s.Wait() 1814 } 1815 1816 // Pause pauses any protocol level background workers. 1817 func (s *Stack) Pause() { 1818 for _, p := range s.transportProtocols { 1819 p.proto.Pause() 1820 } 1821 } 1822 1823 // Restore restarts the stack after a restore. This must be called after the 1824 // entire system has been restored. 1825 func (s *Stack) Restore() { 1826 // RestoredEndpoint.Restore() may call other methods on s, so we can't hold 1827 // s.mu while restoring the endpoints. 1828 s.mu.Lock() 1829 eps := s.restoredEndpoints 1830 s.restoredEndpoints = nil 1831 s.mu.Unlock() 1832 for _, e := range eps { 1833 e.Restore(s) 1834 } 1835 // Now resume any protocol level background workers. 1836 for _, p := range s.transportProtocols { 1837 p.proto.Resume() 1838 } 1839 } 1840 1841 // Resume resumes the stack after a save. 1842 func (s *Stack) Resume() { 1843 s.mu.Lock() 1844 eps := s.resumableEndpoints 1845 s.resumableEndpoints = nil 1846 s.mu.Unlock() 1847 for _, e := range eps { 1848 e.Resume() 1849 } 1850 // Now resume any protocol level background workers. 1851 for _, p := range s.transportProtocols { 1852 p.proto.Resume() 1853 } 1854 } 1855 1856 // RegisterPacketEndpoint registers ep with the stack, causing it to receive 1857 // all traffic of the specified netProto on the given NIC. If nicID is 0, it 1858 // receives traffic from every NIC. 1859 func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error { 1860 s.mu.Lock() 1861 defer s.mu.Unlock() 1862 1863 // If no NIC is specified, capture on all devices. 1864 if nicID == 0 { 1865 // Register with each NIC. 1866 for _, nic := range s.nics { 1867 nic.registerPacketEndpoint(netProto, ep) 1868 } 1869 return nil 1870 } 1871 1872 // Capture on a specific device. 1873 nic, ok := s.nics[nicID] 1874 if !ok { 1875 return &tcpip.ErrUnknownNICID{} 1876 } 1877 nic.registerPacketEndpoint(netProto, ep) 1878 1879 return nil 1880 } 1881 1882 // UnregisterPacketEndpoint unregisters ep for packets of the specified 1883 // netProto from the specified NIC. If nicID is 0, ep is unregistered from all 1884 // NICs. 1885 func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1886 s.mu.Lock() 1887 defer s.mu.Unlock() 1888 s.unregisterPacketEndpointLocked(nicID, netProto, ep) 1889 } 1890 1891 // +checklocks:s.mu 1892 func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1893 // If no NIC is specified, unregister on all devices. 1894 if nicID == 0 { 1895 // Unregister with each NIC. 1896 for _, nic := range s.nics { 1897 nic.unregisterPacketEndpoint(netProto, ep) 1898 } 1899 return 1900 } 1901 1902 // Unregister in a single device. 1903 nic, ok := s.nics[nicID] 1904 if !ok { 1905 return 1906 } 1907 nic.unregisterPacketEndpoint(netProto, ep) 1908 } 1909 1910 // WritePacketToRemote writes a payload on the specified NIC using the provided 1911 // network protocol and remote link address. 1912 func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1913 s.mu.Lock() 1914 nic, ok := s.nics[nicID] 1915 s.mu.Unlock() 1916 if !ok { 1917 return &tcpip.ErrUnknownDevice{} 1918 } 1919 pkt := NewPacketBuffer(PacketBufferOptions{ 1920 ReserveHeaderBytes: int(nic.MaxHeaderLength()), 1921 Payload: payload, 1922 }) 1923 defer pkt.DecRef() 1924 pkt.NetworkProtocolNumber = netProto 1925 return nic.WritePacketToRemote(remote, pkt) 1926 } 1927 1928 // WriteRawPacket writes data directly to the specified NIC without adding any 1929 // headers. 1930 func (s *Stack) WriteRawPacket(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1931 s.mu.RLock() 1932 nic, ok := s.nics[nicID] 1933 s.mu.RUnlock() 1934 if !ok { 1935 return &tcpip.ErrUnknownNICID{} 1936 } 1937 1938 pkt := NewPacketBuffer(PacketBufferOptions{ 1939 Payload: payload, 1940 }) 1941 defer pkt.DecRef() 1942 pkt.NetworkProtocolNumber = proto 1943 return nic.writeRawPacketWithLinkHeaderInPayload(pkt) 1944 } 1945 1946 // NetworkProtocolInstance returns the protocol instance in the stack for the 1947 // specified network protocol. This method is public for protocol implementers 1948 // and tests to use. 1949 func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol { 1950 if p, ok := s.networkProtocols[num]; ok { 1951 return p 1952 } 1953 return nil 1954 } 1955 1956 // TransportProtocolInstance returns the protocol instance in the stack for the 1957 // specified transport protocol. This method is public for protocol implementers 1958 // and tests to use. 1959 func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol { 1960 if pState, ok := s.transportProtocols[num]; ok { 1961 return pState.proto 1962 } 1963 return nil 1964 } 1965 1966 // AddTCPProbe installs a probe function that will be invoked on every segment 1967 // received by a given TCP endpoint. The probe function is passed a copy of the 1968 // TCP endpoint state before and after processing of the segment. 1969 // 1970 // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints 1971 // created prior to this call will not call the probe function. 1972 // 1973 // Further, installing two different probes back to back can result in some 1974 // endpoints calling the first one and some the second one. There is no 1975 // guarantee provided on which probe will be invoked. Ideally this should only 1976 // be called once per stack. 1977 func (s *Stack) AddTCPProbe(probe TCPProbeFunc) { 1978 s.tcpProbeFunc.Store(probe) 1979 } 1980 1981 // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil 1982 // otherwise. 1983 func (s *Stack) GetTCPProbe() TCPProbeFunc { 1984 p := s.tcpProbeFunc.Load() 1985 if p == nil { 1986 return nil 1987 } 1988 return p.(TCPProbeFunc) 1989 } 1990 1991 // RemoveTCPProbe removes an installed TCP probe. 1992 // 1993 // NOTE: This only ensures that endpoints created after this call do not 1994 // have a probe attached. Endpoints already created will continue to invoke 1995 // TCP probe. 1996 func (s *Stack) RemoveTCPProbe() { 1997 // This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics. 1998 s.tcpProbeFunc.Store(TCPProbeFunc(nil)) 1999 } 2000 2001 // JoinGroup joins the given multicast group on the given NIC. 2002 func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 2003 s.mu.RLock() 2004 defer s.mu.RUnlock() 2005 2006 if nic, ok := s.nics[nicID]; ok { 2007 return nic.joinGroup(protocol, multicastAddr) 2008 } 2009 return &tcpip.ErrUnknownNICID{} 2010 } 2011 2012 // LeaveGroup leaves the given multicast group on the given NIC. 2013 func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 2014 s.mu.RLock() 2015 defer s.mu.RUnlock() 2016 2017 if nic, ok := s.nics[nicID]; ok { 2018 return nic.leaveGroup(protocol, multicastAddr) 2019 } 2020 return &tcpip.ErrUnknownNICID{} 2021 } 2022 2023 // IsInGroup returns true if the NIC with ID nicID has joined the multicast 2024 // group multicastAddr. 2025 func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) { 2026 s.mu.RLock() 2027 defer s.mu.RUnlock() 2028 2029 if nic, ok := s.nics[nicID]; ok { 2030 return nic.isInGroup(multicastAddr), nil 2031 } 2032 return false, &tcpip.ErrUnknownNICID{} 2033 } 2034 2035 // IPTables returns the stack's iptables. 2036 func (s *Stack) IPTables() *IPTables { 2037 return s.tables 2038 } 2039 2040 // ICMPLimit returns the maximum number of ICMP messages that can be sent 2041 // in one second. 2042 func (s *Stack) ICMPLimit() rate.Limit { 2043 return s.icmpRateLimiter.Limit() 2044 } 2045 2046 // SetICMPLimit sets the maximum number of ICMP messages that be sent 2047 // in one second. 2048 func (s *Stack) SetICMPLimit(newLimit rate.Limit) { 2049 s.icmpRateLimiter.SetLimit(newLimit) 2050 } 2051 2052 // ICMPBurst returns the maximum number of ICMP messages that can be sent 2053 // in a single burst. 2054 func (s *Stack) ICMPBurst() int { 2055 return s.icmpRateLimiter.Burst() 2056 } 2057 2058 // SetICMPBurst sets the maximum number of ICMP messages that can be sent 2059 // in a single burst. 2060 func (s *Stack) SetICMPBurst(burst int) { 2061 s.icmpRateLimiter.SetBurst(burst) 2062 } 2063 2064 // AllowICMPMessage returns true if we the rate limiter allows at least one 2065 // ICMP message to be sent at this instant. 2066 func (s *Stack) AllowICMPMessage() bool { 2067 return s.icmpRateLimiter.Allow() 2068 } 2069 2070 // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol 2071 // number installed on the specified NIC. 2072 func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) { 2073 s.mu.Lock() 2074 defer s.mu.Unlock() 2075 2076 nic, ok := s.nics[nicID] 2077 if !ok { 2078 return nil, &tcpip.ErrUnknownNICID{} 2079 } 2080 2081 return nic.getNetworkEndpoint(proto), nil 2082 } 2083 2084 // NUDConfigurations gets the per-interface NUD configurations. 2085 func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) { 2086 s.mu.RLock() 2087 nic, ok := s.nics[id] 2088 s.mu.RUnlock() 2089 2090 if !ok { 2091 return NUDConfigurations{}, &tcpip.ErrUnknownNICID{} 2092 } 2093 2094 return nic.nudConfigs(proto) 2095 } 2096 2097 // SetNUDConfigurations sets the per-interface NUD configurations. 2098 // 2099 // Note, if c contains invalid NUD configuration values, it will be fixed to 2100 // use default values for the erroneous values. 2101 func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error { 2102 s.mu.RLock() 2103 nic, ok := s.nics[id] 2104 s.mu.RUnlock() 2105 2106 if !ok { 2107 return &tcpip.ErrUnknownNICID{} 2108 } 2109 2110 return nic.setNUDConfigs(proto, c) 2111 } 2112 2113 // Seed returns a 32 bit value that can be used as a seed value. 2114 // 2115 // NOTE: The seed is generated once during stack initialization only. 2116 func (s *Stack) Seed() uint32 { 2117 return s.seed 2118 } 2119 2120 // InsecureRNG returns a reference to a pseudo random generator that can be used 2121 // to generate random numbers as required. It is not cryptographically secure 2122 // and should not be used for security sensitive work. 2123 func (s *Stack) InsecureRNG() *rand.Rand { 2124 return s.insecureRNG 2125 } 2126 2127 // SecureRNG returns the stack's cryptographically secure random number 2128 // generator. 2129 func (s *Stack) SecureRNG() cryptorand.RNG { 2130 return s.secureRNG 2131 } 2132 2133 // FindNICNameFromID returns the name of the NIC for the given NICID. 2134 func (s *Stack) FindNICNameFromID(id tcpip.NICID) string { 2135 s.mu.RLock() 2136 defer s.mu.RUnlock() 2137 2138 nic, ok := s.nics[id] 2139 if !ok { 2140 return "" 2141 } 2142 2143 return nic.Name() 2144 } 2145 2146 // ParseResult indicates the result of a parsing attempt. 2147 type ParseResult int 2148 2149 const ( 2150 // ParsedOK indicates that a packet was successfully parsed. 2151 ParsedOK ParseResult = iota 2152 2153 // UnknownTransportProtocol indicates that the transport protocol is unknown. 2154 UnknownTransportProtocol 2155 2156 // TransportLayerParseError indicates that the transport packet was not 2157 // successfully parsed. 2158 TransportLayerParseError 2159 ) 2160 2161 // ParsePacketBufferTransport parses the provided packet buffer's transport 2162 // header. 2163 func (s *Stack) ParsePacketBufferTransport(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) ParseResult { 2164 pkt.TransportProtocolNumber = protocol 2165 // Parse the transport header if present. 2166 state, ok := s.transportProtocols[protocol] 2167 if !ok { 2168 return UnknownTransportProtocol 2169 } 2170 2171 if !state.proto.Parse(pkt) { 2172 return TransportLayerParseError 2173 } 2174 2175 return ParsedOK 2176 } 2177 2178 // networkProtocolNumbers returns the network protocol numbers the stack is 2179 // configured with. 2180 func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber { 2181 protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols)) 2182 for p := range s.networkProtocols { 2183 protos = append(protos, p) 2184 } 2185 return protos 2186 } 2187 2188 func isSubnetBroadcastOnNIC(nic *nic, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2189 addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint) 2190 if addressEndpoint == nil { 2191 return false 2192 } 2193 2194 subnet := addressEndpoint.Subnet() 2195 addressEndpoint.DecRef() 2196 return subnet.IsBroadcast(addr) 2197 } 2198 2199 // IsSubnetBroadcast returns true if the provided address is a subnet-local 2200 // broadcast address on the specified NIC and protocol. 2201 // 2202 // Returns false if the NIC is unknown or if the protocol is unknown or does 2203 // not support addressing. 2204 // 2205 // If the NIC is not specified, the stack will check all NICs. 2206 func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2207 s.mu.RLock() 2208 defer s.mu.RUnlock() 2209 2210 if nicID != 0 { 2211 nic, ok := s.nics[nicID] 2212 if !ok { 2213 return false 2214 } 2215 2216 return isSubnetBroadcastOnNIC(nic, protocol, addr) 2217 } 2218 2219 for _, nic := range s.nics { 2220 if isSubnetBroadcastOnNIC(nic, protocol, addr) { 2221 return true 2222 } 2223 } 2224 2225 return false 2226 } 2227 2228 // PacketEndpointWriteSupported returns true iff packet endpoints support write 2229 // operations. 2230 func (s *Stack) PacketEndpointWriteSupported() bool { 2231 return s.packetEndpointWriteSupported 2232 }