github.com/noisysockets/netstack@v0.6.0/pkg/tcpip/stack/stack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package stack provides the glue between networking protocols and the 16 // consumers of the networking stack. 17 // 18 // For consumers, the only function of interest is New(), everything else is 19 // provided by the tcpip/public package. 20 package stack 21 22 import ( 23 "encoding/binary" 24 "fmt" 25 "io" 26 "math/rand" 27 "sync/atomic" 28 "time" 29 30 "golang.org/x/time/rate" 31 "github.com/noisysockets/netstack/pkg/atomicbitops" 32 "github.com/noisysockets/netstack/pkg/buffer" 33 "github.com/noisysockets/netstack/pkg/log" 34 cryptorand "github.com/noisysockets/netstack/pkg/rand" 35 "github.com/noisysockets/netstack/pkg/tcpip" 36 "github.com/noisysockets/netstack/pkg/tcpip/header" 37 "github.com/noisysockets/netstack/pkg/tcpip/ports" 38 "github.com/noisysockets/netstack/pkg/waiter" 39 ) 40 41 const ( 42 // DefaultTOS is the default type of service value for network endpoints. 43 DefaultTOS = 0 44 ) 45 46 type transportProtocolState struct { 47 proto TransportProtocol 48 defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool 49 } 50 51 // RestoredEndpoint is an endpoint that needs to be restored. 52 type RestoredEndpoint interface { 53 // Restore restores an endpoint. This can be used to restart background 54 // workers such as protocol goroutines. This must be called after all 55 // indirect dependencies of the endpoint has been restored, which 56 // generally implies at the end of the restore process. 57 Restore(*Stack) 58 } 59 60 // ResumableEndpoint is an endpoint that needs to be resumed after save. 61 type ResumableEndpoint interface { 62 // Resume resumes an endpoint. 63 Resume() 64 } 65 66 // uniqueIDGenerator is a default unique ID generator. 67 type uniqueIDGenerator atomicbitops.Uint64 68 69 func (u *uniqueIDGenerator) UniqueID() uint64 { 70 return ((*atomicbitops.Uint64)(u)).Add(1) 71 } 72 73 var netRawMissingLogger = log.BasicRateLimitedLogger(time.Minute) 74 75 // Stack is a networking stack, with all supported protocols, NICs, and route 76 // table. 77 // 78 // LOCK ORDERING: mu > routeMu. 79 type Stack struct { 80 transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState 81 networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol 82 83 // rawFactory creates raw endpoints. If nil, raw endpoints are 84 // disabled. It is set during Stack creation and is immutable. 85 rawFactory RawFactory 86 packetEndpointWriteSupported bool 87 88 demux *transportDemuxer 89 90 stats tcpip.Stats 91 92 // routeMu protects annotated fields below. 93 routeMu routeStackRWMutex 94 95 // +checklocks:routeMu 96 routeTable []tcpip.Route 97 98 mu stackRWMutex 99 // +checklocks:mu 100 nics map[tcpip.NICID]*nic 101 defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{} 102 103 // cleanupEndpointsMu protects cleanupEndpoints. 104 cleanupEndpointsMu cleanupEndpointsMutex 105 // +checklocks:cleanupEndpointsMu 106 cleanupEndpoints map[TransportEndpoint]struct{} 107 108 *ports.PortManager 109 110 // If not nil, then any new endpoints will have this probe function 111 // invoked everytime they receive a TCP segment. 112 tcpProbeFunc atomic.Value // TCPProbeFunc 113 114 // clock is used to generate user-visible times. 115 clock tcpip.Clock 116 117 // handleLocal allows non-loopback interfaces to loop packets. 118 handleLocal bool 119 120 // tables are the iptables packet filtering and manipulation rules. 121 // TODO(gvisor.dev/issue/4595): S/R this field. 122 tables *IPTables 123 124 // restoredEndpoints is a list of endpoints that need to be restored if the 125 // stack is being restored. 126 restoredEndpoints []RestoredEndpoint 127 128 // resumableEndpoints is a list of endpoints that need to be resumed 129 // after save. 130 resumableEndpoints []ResumableEndpoint 131 132 // icmpRateLimiter is a global rate limiter for all ICMP messages generated 133 // by the stack. 134 icmpRateLimiter *ICMPRateLimiter 135 136 // seed is a one-time random value initialized at stack startup. 137 // 138 // TODO(gvisor.dev/issue/940): S/R this field. 139 seed uint32 140 141 // nudConfigs is the default NUD configurations used by interfaces. 142 nudConfigs NUDConfigurations 143 144 // nudDisp is the NUD event dispatcher that is used to send the netstack 145 // integrator NUD related events. 146 nudDisp NUDDispatcher 147 148 // uniqueIDGenerator is a generator of unique identifiers. 149 uniqueIDGenerator UniqueID 150 151 // randomGenerator is an injectable pseudo random generator that can be 152 // used when a random number is required. It must not be used in 153 // security-sensitive contexts. 154 insecureRNG *rand.Rand 155 156 // secureRNG is a cryptographically secure random number generator. 157 secureRNG cryptorand.RNG 158 159 // sendBufferSize holds the min/default/max send buffer sizes for 160 // endpoints other than TCP. 161 sendBufferSize tcpip.SendBufferSizeOption 162 163 // receiveBufferSize holds the min/default/max receive buffer sizes for 164 // endpoints other than TCP. 165 receiveBufferSize tcpip.ReceiveBufferSizeOption 166 167 // tcpInvalidRateLimit is the maximal rate for sending duplicate 168 // acknowledgements in response to incoming TCP packets that are for an existing 169 // connection but that are invalid due to any of the following reasons: 170 // 171 // a) out-of-window sequence number. 172 // b) out-of-window acknowledgement number. 173 // c) PAWS check failure (when implemented). 174 // 175 // This is required to prevent potential ACK loops. 176 // Setting this to 0 will disable all rate limiting. 177 tcpInvalidRateLimit time.Duration 178 179 // tsOffsetSecret is the secret key for generating timestamp offsets 180 // initialized at stack startup. 181 tsOffsetSecret uint32 182 } 183 184 // UniqueID is an abstract generator of unique identifiers. 185 type UniqueID interface { 186 UniqueID() uint64 187 } 188 189 // NetworkProtocolFactory instantiates a network protocol. 190 // 191 // NetworkProtocolFactory must not attempt to modify the stack, it may only 192 // query the stack. 193 type NetworkProtocolFactory func(*Stack) NetworkProtocol 194 195 // TransportProtocolFactory instantiates a transport protocol. 196 // 197 // TransportProtocolFactory must not attempt to modify the stack, it may only 198 // query the stack. 199 type TransportProtocolFactory func(*Stack) TransportProtocol 200 201 // Options contains optional Stack configuration. 202 type Options struct { 203 // NetworkProtocols lists the network protocols to enable. 204 NetworkProtocols []NetworkProtocolFactory 205 206 // TransportProtocols lists the transport protocols to enable. 207 TransportProtocols []TransportProtocolFactory 208 209 // Clock is an optional clock used for timekeeping. 210 // 211 // If Clock is nil, tcpip.NewStdClock() will be used. 212 Clock tcpip.Clock 213 214 // Stats are optional statistic counters. 215 Stats tcpip.Stats 216 217 // HandleLocal indicates whether packets destined to their source 218 // should be handled by the stack internally (true) or outside the 219 // stack (false). 220 HandleLocal bool 221 222 // UniqueID is an optional generator of unique identifiers. 223 UniqueID UniqueID 224 225 // NUDConfigs is the default NUD configurations used by interfaces. 226 NUDConfigs NUDConfigurations 227 228 // NUDDisp is the NUD event dispatcher that an integrator can provide to 229 // receive NUD related events. 230 NUDDisp NUDDispatcher 231 232 // RawFactory produces raw endpoints. Raw endpoints are enabled only if 233 // this is non-nil. 234 RawFactory RawFactory 235 236 // AllowPacketEndpointWrite determines if packet endpoints support write 237 // operations. 238 AllowPacketEndpointWrite bool 239 240 // RandSource is an optional source to use to generate random 241 // numbers. If omitted it defaults to a Source seeded by the data 242 // returned by the stack secure RNG. 243 // 244 // RandSource must be thread-safe. 245 RandSource rand.Source 246 247 // IPTables are the initial iptables rules. If nil, DefaultIPTables will be 248 // used to construct the initial iptables rules. 249 // all traffic. 250 IPTables *IPTables 251 252 // DefaultIPTables is an optional iptables rules constructor that is called 253 // if IPTables is nil. If both fields are nil, iptables will allow all 254 // traffic. 255 DefaultIPTables func(clock tcpip.Clock, rand *rand.Rand) *IPTables 256 257 // SecureRNG is a cryptographically secure random number generator. 258 SecureRNG io.Reader 259 } 260 261 // TransportEndpointInfo holds useful information about a transport endpoint 262 // which can be queried by monitoring tools. 263 // 264 // +stateify savable 265 type TransportEndpointInfo struct { 266 // The following fields are initialized at creation time and are 267 // immutable. 268 269 NetProto tcpip.NetworkProtocolNumber 270 TransProto tcpip.TransportProtocolNumber 271 272 // The following fields are protected by endpoint mu. 273 274 ID TransportEndpointID 275 // BindNICID and bindAddr are set via calls to Bind(). They are used to 276 // reject attempts to send data or connect via a different NIC or 277 // address 278 BindNICID tcpip.NICID 279 BindAddr tcpip.Address 280 // RegisterNICID is the default NICID registered as a side-effect of 281 // connect or datagram write. 282 RegisterNICID tcpip.NICID 283 } 284 285 // AddrNetProtoLocked unwraps the specified address if it is a V4-mapped V6 286 // address and returns the network protocol number to be used to communicate 287 // with the specified address. It returns an error if the passed address is 288 // incompatible with the receiver. 289 // 290 // Preconditon: the parent endpoint mu must be held while calling this method. 291 func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { 292 netProto := t.NetProto 293 switch addr.Addr.BitLen() { 294 case header.IPv4AddressSizeBits: 295 netProto = header.IPv4ProtocolNumber 296 case header.IPv6AddressSizeBits: 297 if header.IsV4MappedAddress(addr.Addr) { 298 netProto = header.IPv4ProtocolNumber 299 addr.Addr = tcpip.AddrFrom4Slice(addr.Addr.AsSlice()[header.IPv6AddressSize-header.IPv4AddressSize:]) 300 if addr.Addr == header.IPv4Any { 301 addr.Addr = tcpip.Address{} 302 } 303 } 304 } 305 306 switch t.ID.LocalAddress.BitLen() { 307 case header.IPv4AddressSizeBits: 308 if addr.Addr.BitLen() == header.IPv6AddressSizeBits { 309 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 310 } 311 case header.IPv6AddressSizeBits: 312 if addr.Addr.BitLen() == header.IPv4AddressSizeBits { 313 return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{} 314 } 315 } 316 317 switch { 318 case netProto == t.NetProto: 319 case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber: 320 if v6only { 321 return tcpip.FullAddress{}, 0, &tcpip.ErrHostUnreachable{} 322 } 323 default: 324 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 325 } 326 327 return addr, netProto, nil 328 } 329 330 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo 331 // marker interface. 332 func (*TransportEndpointInfo) IsEndpointInfo() {} 333 334 // New allocates a new networking stack with only the requested networking and 335 // transport protocols configured with default options. 336 // 337 // Note, NDPConfigurations will be fixed before being used by the Stack. That 338 // is, if an invalid value was provided, it will be reset to the default value. 339 // 340 // Protocol options can be changed by calling the 341 // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the 342 // stack. Please refer to individual protocol implementations as to what options 343 // are supported. 344 func New(opts Options) *Stack { 345 clock := opts.Clock 346 if clock == nil { 347 clock = tcpip.NewStdClock() 348 } 349 350 if opts.UniqueID == nil { 351 opts.UniqueID = new(uniqueIDGenerator) 352 } 353 354 if opts.SecureRNG == nil { 355 opts.SecureRNG = cryptorand.Reader 356 } 357 secureRNG := cryptorand.RNGFrom(opts.SecureRNG) 358 359 randSrc := opts.RandSource 360 if randSrc == nil { 361 var v int64 362 if err := binary.Read(opts.SecureRNG, binary.LittleEndian, &v); err != nil { 363 panic(err) 364 } 365 // Source provided by rand.NewSource is not thread-safe so 366 // we wrap it in a simple thread-safe version. 367 randSrc = &lockedRandomSource{src: rand.NewSource(v)} 368 } 369 insecureRNG := rand.New(randSrc) 370 371 if opts.IPTables == nil { 372 if opts.DefaultIPTables == nil { 373 opts.DefaultIPTables = DefaultTables 374 } 375 opts.IPTables = opts.DefaultIPTables(clock, insecureRNG) 376 } 377 378 opts.NUDConfigs.resetInvalidFields() 379 380 s := &Stack{ 381 transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState), 382 networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol), 383 nics: make(map[tcpip.NICID]*nic), 384 packetEndpointWriteSupported: opts.AllowPacketEndpointWrite, 385 defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}), 386 cleanupEndpoints: make(map[TransportEndpoint]struct{}), 387 PortManager: ports.NewPortManager(), 388 clock: clock, 389 stats: opts.Stats.FillIn(), 390 handleLocal: opts.HandleLocal, 391 tables: opts.IPTables, 392 icmpRateLimiter: NewICMPRateLimiter(clock), 393 seed: secureRNG.Uint32(), 394 nudConfigs: opts.NUDConfigs, 395 uniqueIDGenerator: opts.UniqueID, 396 nudDisp: opts.NUDDisp, 397 insecureRNG: insecureRNG, 398 secureRNG: secureRNG, 399 sendBufferSize: tcpip.SendBufferSizeOption{ 400 Min: MinBufferSize, 401 Default: DefaultBufferSize, 402 Max: DefaultMaxBufferSize, 403 }, 404 receiveBufferSize: tcpip.ReceiveBufferSizeOption{ 405 Min: MinBufferSize, 406 Default: DefaultBufferSize, 407 Max: DefaultMaxBufferSize, 408 }, 409 tcpInvalidRateLimit: defaultTCPInvalidRateLimit, 410 tsOffsetSecret: secureRNG.Uint32(), 411 } 412 413 // Add specified network protocols. 414 for _, netProtoFactory := range opts.NetworkProtocols { 415 netProto := netProtoFactory(s) 416 s.networkProtocols[netProto.Number()] = netProto 417 } 418 419 // Add specified transport protocols. 420 for _, transProtoFactory := range opts.TransportProtocols { 421 transProto := transProtoFactory(s) 422 s.transportProtocols[transProto.Number()] = &transportProtocolState{ 423 proto: transProto, 424 } 425 } 426 427 // Add the factory for raw endpoints, if present. 428 s.rawFactory = opts.RawFactory 429 430 // Create the global transport demuxer. 431 s.demux = newTransportDemuxer(s) 432 433 return s 434 } 435 436 // UniqueID returns a unique identifier. 437 func (s *Stack) UniqueID() uint64 { 438 return s.uniqueIDGenerator.UniqueID() 439 } 440 441 // SetNetworkProtocolOption allows configuring individual protocol level 442 // options. This method returns an error if the protocol is not supported or 443 // option is not supported by the protocol implementation or the provided value 444 // is incorrect. 445 func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error { 446 netProto, ok := s.networkProtocols[network] 447 if !ok { 448 return &tcpip.ErrUnknownProtocol{} 449 } 450 return netProto.SetOption(option) 451 } 452 453 // NetworkProtocolOption allows retrieving individual protocol level option 454 // values. This method returns an error if the protocol is not supported or 455 // option is not supported by the protocol implementation. E.g.: 456 // 457 // var v ipv4.MyOption 458 // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v) 459 // if err != nil { 460 // ... 461 // } 462 func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error { 463 netProto, ok := s.networkProtocols[network] 464 if !ok { 465 return &tcpip.ErrUnknownProtocol{} 466 } 467 return netProto.Option(option) 468 } 469 470 // SetTransportProtocolOption allows configuring individual protocol level 471 // options. This method returns an error if the protocol is not supported or 472 // option is not supported by the protocol implementation or the provided value 473 // is incorrect. 474 func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error { 475 transProtoState, ok := s.transportProtocols[transport] 476 if !ok { 477 return &tcpip.ErrUnknownProtocol{} 478 } 479 return transProtoState.proto.SetOption(option) 480 } 481 482 // TransportProtocolOption allows retrieving individual protocol level option 483 // values. This method returns an error if the protocol is not supported or 484 // option is not supported by the protocol implementation. 485 // 486 // var v tcp.SACKEnabled 487 // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil { 488 // ... 489 // } 490 func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error { 491 transProtoState, ok := s.transportProtocols[transport] 492 if !ok { 493 return &tcpip.ErrUnknownProtocol{} 494 } 495 return transProtoState.proto.Option(option) 496 } 497 498 // SendBufSizeProto is a protocol that can return its send buffer size. 499 type SendBufSizeProto interface { 500 SendBufferSize() tcpip.TCPSendBufferSizeRangeOption 501 } 502 503 // TCPSendBufferLimits returns the TCP send buffer size limit. 504 func (s *Stack) TCPSendBufferLimits() tcpip.TCPSendBufferSizeRangeOption { 505 return s.transportProtocols[header.TCPProtocolNumber].proto.(SendBufSizeProto).SendBufferSize() 506 } 507 508 // SetTransportProtocolHandler sets the per-stack default handler for the given 509 // protocol. 510 // 511 // It must be called only during initialization of the stack. Changing it as the 512 // stack is operating is not supported. 513 func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, *PacketBuffer) bool) { 514 state := s.transportProtocols[p] 515 if state != nil { 516 state.defaultHandler = h 517 } 518 } 519 520 // Clock returns the Stack's clock for retrieving the current time and 521 // scheduling work. 522 func (s *Stack) Clock() tcpip.Clock { 523 return s.clock 524 } 525 526 // Stats returns a mutable copy of the current stats. 527 // 528 // This is not generally exported via the public interface, but is available 529 // internally. 530 func (s *Stack) Stats() tcpip.Stats { 531 return s.stats 532 } 533 534 // SetNICForwarding enables or disables packet forwarding on the specified NIC 535 // for the passed protocol. 536 // 537 // Returns the previous configuration on the NIC. 538 func (s *Stack) SetNICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 539 s.mu.RLock() 540 defer s.mu.RUnlock() 541 542 nic, ok := s.nics[id] 543 if !ok { 544 return false, &tcpip.ErrUnknownNICID{} 545 } 546 547 return nic.setForwarding(protocol, enable) 548 } 549 550 // NICForwarding returns the forwarding configuration for the specified NIC. 551 func (s *Stack) NICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 552 s.mu.RLock() 553 defer s.mu.RUnlock() 554 555 nic, ok := s.nics[id] 556 if !ok { 557 return false, &tcpip.ErrUnknownNICID{} 558 } 559 560 return nic.forwarding(protocol) 561 } 562 563 // SetForwardingDefaultAndAllNICs sets packet forwarding for all NICs for the 564 // passed protocol and sets the default setting for newly created NICs. 565 func (s *Stack) SetForwardingDefaultAndAllNICs(protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error { 566 s.mu.Lock() 567 defer s.mu.Unlock() 568 569 doneOnce := false 570 for id, nic := range s.nics { 571 if _, err := nic.setForwarding(protocol, enable); err != nil { 572 // Expect forwarding to be settable on all interfaces if it was set on 573 // one. 574 if doneOnce { 575 panic(fmt.Sprintf("nic(id=%d).setForwarding(%d, %t): %s", id, protocol, enable, err)) 576 } 577 578 return err 579 } 580 581 doneOnce = true 582 } 583 584 if enable { 585 s.defaultForwardingEnabled[protocol] = struct{}{} 586 } else { 587 delete(s.defaultForwardingEnabled, protocol) 588 } 589 590 return nil 591 } 592 593 // AddMulticastRoute adds a multicast route to be used for the specified 594 // addresses and protocol. 595 func (s *Stack) AddMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination, route MulticastRoute) tcpip.Error { 596 netProto, ok := s.networkProtocols[protocol] 597 if !ok { 598 return &tcpip.ErrUnknownProtocol{} 599 } 600 601 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 602 if !ok { 603 return &tcpip.ErrNotSupported{} 604 } 605 606 return forwardingNetProto.AddMulticastRoute(addresses, route) 607 } 608 609 // RemoveMulticastRoute removes a multicast route that matches the specified 610 // addresses and protocol. 611 func (s *Stack) RemoveMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) tcpip.Error { 612 netProto, ok := s.networkProtocols[protocol] 613 if !ok { 614 return &tcpip.ErrUnknownProtocol{} 615 } 616 617 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 618 if !ok { 619 return &tcpip.ErrNotSupported{} 620 } 621 622 return forwardingNetProto.RemoveMulticastRoute(addresses) 623 } 624 625 // MulticastRouteLastUsedTime returns a monotonic timestamp that represents the 626 // last time that the route that matches the provided addresses and protocol 627 // was used or updated. 628 func (s *Stack) MulticastRouteLastUsedTime(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 629 netProto, ok := s.networkProtocols[protocol] 630 if !ok { 631 return tcpip.MonotonicTime{}, &tcpip.ErrUnknownProtocol{} 632 } 633 634 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 635 if !ok { 636 return tcpip.MonotonicTime{}, &tcpip.ErrNotSupported{} 637 } 638 639 return forwardingNetProto.MulticastRouteLastUsedTime(addresses) 640 } 641 642 // EnableMulticastForwardingForProtocol enables multicast forwarding for the 643 // provided protocol. 644 // 645 // Returns true if forwarding was already enabled on the protocol. 646 // Additionally, returns an error if: 647 // 648 // - The protocol is not found. 649 // - The protocol doesn't support multicast forwarding. 650 // - The multicast forwarding event dispatcher is nil. 651 // 652 // If successful, future multicast forwarding events will be sent to the 653 // provided event dispatcher. 654 func (s *Stack) EnableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber, disp MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 655 netProto, ok := s.networkProtocols[protocol] 656 if !ok { 657 return false, &tcpip.ErrUnknownProtocol{} 658 } 659 660 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 661 if !ok { 662 return false, &tcpip.ErrNotSupported{} 663 } 664 665 return forwardingNetProto.EnableMulticastForwarding(disp) 666 } 667 668 // DisableMulticastForwardingForProtocol disables multicast forwarding for the 669 // provided protocol. 670 // 671 // Returns an error if the provided protocol is not found or if it does not 672 // support multicast forwarding. 673 func (s *Stack) DisableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber) tcpip.Error { 674 netProto, ok := s.networkProtocols[protocol] 675 if !ok { 676 return &tcpip.ErrUnknownProtocol{} 677 } 678 679 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 680 if !ok { 681 return &tcpip.ErrNotSupported{} 682 } 683 684 forwardingNetProto.DisableMulticastForwarding() 685 return nil 686 } 687 688 // SetNICMulticastForwarding enables or disables multicast packet forwarding on 689 // the specified NIC for the passed protocol. 690 // 691 // Returns the previous configuration on the NIC. 692 // 693 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 694 // forwarding. Currently, setting this value is a no-op and is not ready for 695 // use. 696 func (s *Stack) SetNICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 697 s.mu.RLock() 698 defer s.mu.RUnlock() 699 700 nic, ok := s.nics[id] 701 if !ok { 702 return false, &tcpip.ErrUnknownNICID{} 703 } 704 705 return nic.setMulticastForwarding(protocol, enable) 706 } 707 708 // NICMulticastForwarding returns the multicast forwarding configuration for 709 // the specified NIC. 710 func (s *Stack) NICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 711 s.mu.RLock() 712 defer s.mu.RUnlock() 713 714 nic, ok := s.nics[id] 715 if !ok { 716 return false, &tcpip.ErrUnknownNICID{} 717 } 718 719 return nic.multicastForwarding(protocol) 720 } 721 722 // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in 723 // both IPv4 and IPv6. 724 func (s *Stack) PortRange() (uint16, uint16) { 725 return s.PortManager.PortRange() 726 } 727 728 // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range 729 // (inclusive). 730 func (s *Stack) SetPortRange(start uint16, end uint16) tcpip.Error { 731 return s.PortManager.SetPortRange(start, end) 732 } 733 734 // SetRouteTable assigns the route table to be used by this stack. It 735 // specifies which NIC to use for given destination address ranges. 736 // 737 // This method takes ownership of the table. 738 func (s *Stack) SetRouteTable(table []tcpip.Route) { 739 s.routeMu.Lock() 740 defer s.routeMu.Unlock() 741 s.routeTable = table 742 } 743 744 // GetRouteTable returns the route table which is currently in use. 745 func (s *Stack) GetRouteTable() []tcpip.Route { 746 s.routeMu.RLock() 747 defer s.routeMu.RUnlock() 748 return append([]tcpip.Route(nil), s.routeTable...) 749 } 750 751 // AddRoute appends a route to the route table. 752 func (s *Stack) AddRoute(route tcpip.Route) { 753 s.routeMu.Lock() 754 defer s.routeMu.Unlock() 755 s.routeTable = append(s.routeTable, route) 756 } 757 758 // RemoveRoutes removes matching routes from the route table. 759 func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) { 760 s.routeMu.Lock() 761 defer s.routeMu.Unlock() 762 763 var filteredRoutes []tcpip.Route 764 for _, route := range s.routeTable { 765 if !match(route) { 766 filteredRoutes = append(filteredRoutes, route) 767 } 768 } 769 s.routeTable = filteredRoutes 770 } 771 772 // NewEndpoint creates a new transport layer endpoint of the given protocol. 773 func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 774 t, ok := s.transportProtocols[transport] 775 if !ok { 776 return nil, &tcpip.ErrUnknownProtocol{} 777 } 778 779 return t.proto.NewEndpoint(network, waiterQueue) 780 } 781 782 // NewRawEndpoint creates a new raw transport layer endpoint of the given 783 // protocol. Raw endpoints receive all traffic for a given protocol regardless 784 // of address. 785 func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) { 786 if s.rawFactory == nil { 787 netRawMissingLogger.Infof("A process tried to create a raw socket, but --net-raw was not specified. Should runsc be run with --net-raw?") 788 return nil, &tcpip.ErrNotPermitted{} 789 } 790 791 if !associated { 792 return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue) 793 } 794 795 t, ok := s.transportProtocols[transport] 796 if !ok { 797 return nil, &tcpip.ErrUnknownProtocol{} 798 } 799 800 return t.proto.NewRawEndpoint(network, waiterQueue) 801 } 802 803 // NewPacketEndpoint creates a new packet endpoint listening for the given 804 // netProto. 805 func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 806 if s.rawFactory == nil { 807 return nil, &tcpip.ErrNotPermitted{} 808 } 809 810 return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue) 811 } 812 813 // NICContext is an opaque pointer used to store client-supplied NIC metadata. 814 type NICContext any 815 816 // NICOptions specifies the configuration of a NIC as it is being created. 817 // The zero value creates an enabled, unnamed NIC. 818 type NICOptions struct { 819 // Name specifies the name of the NIC. 820 Name string 821 822 // Disabled specifies whether to avoid calling Attach on the passed 823 // LinkEndpoint. 824 Disabled bool 825 826 // Context specifies user-defined data that will be returned in stack.NICInfo 827 // for the NIC. Clients of this library can use it to add metadata that 828 // should be tracked alongside a NIC, to avoid having to keep a 829 // map[tcpip.NICID]metadata mirroring stack.Stack's nic map. 830 Context NICContext 831 832 // QDisc is the queue discipline to use for this NIC. 833 QDisc QueueingDiscipline 834 835 // DeliverLinkPackets specifies whether the NIC is responsible for 836 // delivering raw packets to packet sockets. 837 DeliverLinkPackets bool 838 } 839 840 // CreateNICWithOptions creates a NIC with the provided id, LinkEndpoint, and 841 // NICOptions. See the documentation on type NICOptions for details on how 842 // NICs can be configured. 843 // 844 // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher. 845 func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error { 846 s.mu.Lock() 847 defer s.mu.Unlock() 848 849 // Make sure id is unique. 850 if _, ok := s.nics[id]; ok { 851 return &tcpip.ErrDuplicateNICID{} 852 } 853 854 // Make sure name is unique, unless unnamed. 855 if opts.Name != "" { 856 for _, n := range s.nics { 857 if n.Name() == opts.Name { 858 return &tcpip.ErrDuplicateNICID{} 859 } 860 } 861 } 862 863 n := newNIC(s, id, ep, opts) 864 for proto := range s.defaultForwardingEnabled { 865 if _, err := n.setForwarding(proto, true); err != nil { 866 panic(fmt.Sprintf("newNIC(%d, ...).setForwarding(%d, true): %s", id, proto, err)) 867 } 868 } 869 s.nics[id] = n 870 if !opts.Disabled { 871 return n.enable() 872 } 873 874 return nil 875 } 876 877 // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls 878 // LinkEndpoint.Attach to bind ep with a NetworkDispatcher. 879 func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error { 880 return s.CreateNICWithOptions(id, ep, NICOptions{}) 881 } 882 883 // GetLinkEndpointByName gets the link endpoint specified by name. 884 func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint { 885 s.mu.RLock() 886 defer s.mu.RUnlock() 887 for _, nic := range s.nics { 888 if nic.Name() == name { 889 linkEP, ok := nic.NetworkLinkEndpoint.(LinkEndpoint) 890 if !ok { 891 panic(fmt.Sprintf("unexpected NetworkLinkEndpoint(%#v) is not a LinkEndpoint", nic.NetworkLinkEndpoint)) 892 } 893 return linkEP 894 } 895 } 896 return nil 897 } 898 899 // EnableNIC enables the given NIC so that the link-layer endpoint can start 900 // delivering packets to it. 901 func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error { 902 s.mu.RLock() 903 defer s.mu.RUnlock() 904 905 nic, ok := s.nics[id] 906 if !ok { 907 return &tcpip.ErrUnknownNICID{} 908 } 909 910 return nic.enable() 911 } 912 913 // DisableNIC disables the given NIC. 914 func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error { 915 s.mu.RLock() 916 defer s.mu.RUnlock() 917 918 nic, ok := s.nics[id] 919 if !ok { 920 return &tcpip.ErrUnknownNICID{} 921 } 922 923 nic.disable() 924 return nil 925 } 926 927 // CheckNIC checks if a NIC is usable. 928 func (s *Stack) CheckNIC(id tcpip.NICID) bool { 929 s.mu.RLock() 930 defer s.mu.RUnlock() 931 932 nic, ok := s.nics[id] 933 if !ok { 934 return false 935 } 936 937 return nic.Enabled() 938 } 939 940 // RemoveNIC removes NIC and all related routes from the network stack. 941 func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error { 942 s.mu.Lock() 943 defer s.mu.Unlock() 944 945 return s.removeNICLocked(id) 946 } 947 948 // removeNICLocked removes NIC and all related routes from the network stack. 949 // 950 // +checklocks:s.mu 951 func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error { 952 nic, ok := s.nics[id] 953 if !ok { 954 return &tcpip.ErrUnknownNICID{} 955 } 956 delete(s.nics, id) 957 958 // Remove routes in-place. n tracks the number of routes written. 959 s.routeMu.Lock() 960 n := 0 961 for _, r := range s.routeTable { 962 if r.NIC != id { 963 // Keep this route. 964 s.routeTable[n] = r 965 n++ 966 } 967 } 968 clear(s.routeTable[n:]) 969 s.routeTable = s.routeTable[:n] 970 s.routeMu.Unlock() 971 972 return nic.remove() 973 } 974 975 // NICInfo captures the name and addresses assigned to a NIC. 976 type NICInfo struct { 977 Name string 978 LinkAddress tcpip.LinkAddress 979 ProtocolAddresses []tcpip.ProtocolAddress 980 981 // Flags indicate the state of the NIC. 982 Flags NICStateFlags 983 984 // MTU is the maximum transmission unit. 985 MTU uint32 986 987 Stats tcpip.NICStats 988 989 // NetworkStats holds the stats of each NetworkEndpoint bound to the NIC. 990 NetworkStats map[tcpip.NetworkProtocolNumber]NetworkEndpointStats 991 992 // Context is user-supplied data optionally supplied in CreateNICWithOptions. 993 // See type NICOptions for more details. 994 Context NICContext 995 996 // ARPHardwareType holds the ARP Hardware type of the NIC. This is the 997 // value sent in haType field of an ARP Request sent by this NIC and the 998 // value expected in the haType field of an ARP response. 999 ARPHardwareType header.ARPHardwareType 1000 1001 // Forwarding holds the forwarding status for each network endpoint that 1002 // supports forwarding. 1003 Forwarding map[tcpip.NetworkProtocolNumber]bool 1004 1005 // MulticastForwarding holds the forwarding status for each network endpoint 1006 // that supports multicast forwarding. 1007 MulticastForwarding map[tcpip.NetworkProtocolNumber]bool 1008 } 1009 1010 // HasNIC returns true if the NICID is defined in the stack. 1011 func (s *Stack) HasNIC(id tcpip.NICID) bool { 1012 s.mu.RLock() 1013 _, ok := s.nics[id] 1014 s.mu.RUnlock() 1015 return ok 1016 } 1017 1018 // NICInfo returns a map of NICIDs to their associated information. 1019 func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo { 1020 s.mu.RLock() 1021 defer s.mu.RUnlock() 1022 1023 type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error) 1024 forwardingValue := func(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) { 1025 switch forwarding, err := forwardingFn(proto); err.(type) { 1026 case nil: 1027 return forwarding, true 1028 case *tcpip.ErrUnknownProtocol: 1029 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID)) 1030 case *tcpip.ErrNotSupported: 1031 // Not all network protocols support forwarding. 1032 default: 1033 panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err)) 1034 } 1035 return false, false 1036 } 1037 1038 nics := make(map[tcpip.NICID]NICInfo) 1039 for id, nic := range s.nics { 1040 flags := NICStateFlags{ 1041 Up: true, // Netstack interfaces are always up. 1042 Running: nic.Enabled(), 1043 Promiscuous: nic.Promiscuous(), 1044 Loopback: nic.IsLoopback(), 1045 } 1046 1047 netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats) 1048 for proto, netEP := range nic.networkEndpoints { 1049 netStats[proto] = netEP.Stats() 1050 } 1051 1052 info := NICInfo{ 1053 Name: nic.name, 1054 LinkAddress: nic.NetworkLinkEndpoint.LinkAddress(), 1055 ProtocolAddresses: nic.primaryAddresses(), 1056 Flags: flags, 1057 MTU: nic.NetworkLinkEndpoint.MTU(), 1058 Stats: nic.stats.local, 1059 NetworkStats: netStats, 1060 Context: nic.context, 1061 ARPHardwareType: nic.NetworkLinkEndpoint.ARPHardwareType(), 1062 Forwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1063 MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1064 } 1065 1066 for proto := range s.networkProtocols { 1067 if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok { 1068 info.Forwarding[proto] = forwarding 1069 } 1070 1071 if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok { 1072 info.MulticastForwarding[proto] = multicastForwarding 1073 } 1074 } 1075 1076 nics[id] = info 1077 } 1078 return nics 1079 } 1080 1081 // NICStateFlags holds information about the state of an NIC. 1082 type NICStateFlags struct { 1083 // Up indicates whether the interface is running. 1084 Up bool 1085 1086 // Running indicates whether resources are allocated. 1087 Running bool 1088 1089 // Promiscuous indicates whether the interface is in promiscuous mode. 1090 Promiscuous bool 1091 1092 // Loopback indicates whether the interface is a loopback. 1093 Loopback bool 1094 } 1095 1096 // AddProtocolAddress adds an address to the specified NIC, possibly with extra 1097 // properties. 1098 func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error { 1099 s.mu.RLock() 1100 defer s.mu.RUnlock() 1101 1102 nic, ok := s.nics[id] 1103 if !ok { 1104 return &tcpip.ErrUnknownNICID{} 1105 } 1106 1107 return nic.addAddress(protocolAddress, properties) 1108 } 1109 1110 // RemoveAddress removes an existing network-layer address from the specified 1111 // NIC. 1112 func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error { 1113 s.mu.RLock() 1114 defer s.mu.RUnlock() 1115 1116 if nic, ok := s.nics[id]; ok { 1117 return nic.removeAddress(addr) 1118 } 1119 1120 return &tcpip.ErrUnknownNICID{} 1121 } 1122 1123 // SetAddressLifetimes sets informational preferred and valid lifetimes, and 1124 // whether the address should be preferred or deprecated. 1125 func (s *Stack) SetAddressLifetimes(id tcpip.NICID, addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error { 1126 s.mu.RLock() 1127 defer s.mu.RUnlock() 1128 1129 if nic, ok := s.nics[id]; ok { 1130 return nic.setAddressLifetimes(addr, lifetimes) 1131 } 1132 1133 return &tcpip.ErrUnknownNICID{} 1134 } 1135 1136 // AllAddresses returns a map of NICIDs to their protocol addresses (primary 1137 // and non-primary). 1138 func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress { 1139 s.mu.RLock() 1140 defer s.mu.RUnlock() 1141 1142 nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress) 1143 for id, nic := range s.nics { 1144 nics[id] = nic.allPermanentAddresses() 1145 } 1146 return nics 1147 } 1148 1149 // GetMainNICAddress returns the first non-deprecated primary address and prefix 1150 // for the given NIC and protocol. If no non-deprecated primary addresses exist, 1151 // a deprecated address will be returned. If no deprecated addresses exist, the 1152 // zero value will be returned. 1153 func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) { 1154 s.mu.RLock() 1155 defer s.mu.RUnlock() 1156 1157 nic, ok := s.nics[id] 1158 if !ok { 1159 return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownNICID{} 1160 } 1161 1162 return nic.PrimaryAddress(protocol) 1163 } 1164 1165 func (s *Stack) getAddressEP(nic *nic, localAddr, remoteAddr, srcHint tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint { 1166 if localAddr.BitLen() == 0 { 1167 return nic.primaryEndpoint(netProto, remoteAddr, srcHint) 1168 } 1169 return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint) 1170 } 1171 1172 // NewRouteForMulticast returns a Route that may be used to forward multicast 1173 // packets. 1174 // 1175 // Returns nil if validation fails. 1176 func (s *Stack) NewRouteForMulticast(nicID tcpip.NICID, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1177 s.mu.RLock() 1178 defer s.mu.RUnlock() 1179 1180 nic, ok := s.nics[nicID] 1181 if !ok || !nic.Enabled() { 1182 return nil 1183 } 1184 1185 if addressEndpoint := s.getAddressEP(nic, tcpip.Address{} /* localAddr */, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil { 1186 return constructAndValidateRoute(netProto, addressEndpoint, nic, nic, tcpip.Address{} /* gateway */, tcpip.Address{} /* localAddr */, remoteAddr, s.handleLocal, false /* multicastLoop */) 1187 } 1188 return nil 1189 } 1190 1191 // findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route 1192 // from the specified NIC. 1193 // 1194 // +checklocksread:s.mu 1195 func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1196 localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint) 1197 if localAddressEndpoint == nil { 1198 return nil 1199 } 1200 1201 var outgoingNIC *nic 1202 // Prefer a local route to the same interface as the local address. 1203 if localAddressNIC.hasAddress(netProto, remoteAddr) { 1204 outgoingNIC = localAddressNIC 1205 } 1206 1207 // If the remote address isn't owned by the local address's NIC, check all 1208 // NICs. 1209 if outgoingNIC == nil { 1210 for _, nic := range s.nics { 1211 if nic.hasAddress(netProto, remoteAddr) { 1212 outgoingNIC = nic 1213 break 1214 } 1215 } 1216 } 1217 1218 // If the remote address is not owned by the stack, we can't return a local 1219 // route. 1220 if outgoingNIC == nil { 1221 localAddressEndpoint.DecRef() 1222 return nil 1223 } 1224 1225 r := makeLocalRoute( 1226 netProto, 1227 localAddr, 1228 remoteAddr, 1229 outgoingNIC, 1230 localAddressNIC, 1231 localAddressEndpoint, 1232 ) 1233 1234 if r.IsOutboundBroadcast() { 1235 r.Release() 1236 return nil 1237 } 1238 1239 return r 1240 } 1241 1242 // findLocalRouteRLocked returns a local route. 1243 // 1244 // A local route is a route to some remote address which the stack owns. That 1245 // is, a local route is a route where packets never have to leave the stack. 1246 // 1247 // +checklocksread:s.mu 1248 func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1249 if localAddr.BitLen() == 0 { 1250 localAddr = remoteAddr 1251 } 1252 1253 if localAddressNICID == 0 { 1254 for _, localAddressNIC := range s.nics { 1255 if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil { 1256 return r 1257 } 1258 } 1259 1260 return nil 1261 } 1262 1263 if localAddressNIC, ok := s.nics[localAddressNICID]; ok { 1264 return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto) 1265 } 1266 1267 return nil 1268 } 1269 1270 // HandleLocal returns true if non-loopback interfaces are allowed to loop packets. 1271 func (s *Stack) HandleLocal() bool { 1272 return s.handleLocal 1273 } 1274 1275 func isNICForwarding(nic *nic, proto tcpip.NetworkProtocolNumber) bool { 1276 switch forwarding, err := nic.forwarding(proto); err.(type) { 1277 case nil: 1278 return forwarding 1279 case *tcpip.ErrUnknownProtocol: 1280 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID())) 1281 case *tcpip.ErrNotSupported: 1282 // Not all network protocols support forwarding. 1283 return false 1284 default: 1285 panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err)) 1286 } 1287 } 1288 1289 // findRouteWithLocalAddrFromAnyInterfaceRLocked returns a route to the given 1290 // destination address, leaving through the given NIC. 1291 // 1292 // Rather than preferring to find a route that uses a local address assigned to 1293 // the outgoing interface, it finds any NIC that holds a matching local address 1294 // endpoint. 1295 // 1296 // +checklocksread:s.mu 1297 func (s *Stack) findRouteWithLocalAddrFromAnyInterfaceRLocked(outgoingNIC *nic, localAddr, remoteAddr, srcHint, gateway tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) *Route { 1298 for _, aNIC := range s.nics { 1299 addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, srcHint, netProto) 1300 if addressEndpoint == nil { 1301 continue 1302 } 1303 1304 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, outgoingNIC, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1305 return r 1306 } 1307 } 1308 return nil 1309 } 1310 1311 // FindRoute creates a route to the given destination address, leaving through 1312 // the given NIC and local address (if provided). 1313 // 1314 // If a NIC is not specified, the returned route will leave through the same 1315 // NIC as the NIC that has the local address assigned when forwarding is 1316 // disabled. If forwarding is enabled and the NIC is unspecified, the route may 1317 // leave through any interface unless the route is link-local. 1318 // 1319 // If no local address is provided, the stack will select a local address. If no 1320 // remote address is provided, the stack will use a remote address equal to the 1321 // local address. 1322 func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) { 1323 s.mu.RLock() 1324 defer s.mu.RUnlock() 1325 1326 // Reject attempts to use unsupported protocols. 1327 if !s.CheckNetworkProtocol(netProto) { 1328 return nil, &tcpip.ErrUnknownProtocol{} 1329 } 1330 1331 isLinkLocal := header.IsV6LinkLocalUnicastAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr) 1332 isLocalBroadcast := remoteAddr == header.IPv4Broadcast 1333 isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr) 1334 isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr) 1335 needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback) 1336 1337 if s.handleLocal && !isMulticast && !isLocalBroadcast { 1338 if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil { 1339 return r, nil 1340 } 1341 } 1342 1343 // If the interface is specified and we do not need a route, return a route 1344 // through the interface if the interface is valid and enabled. 1345 if id != 0 && !needRoute { 1346 if nic, ok := s.nics[id]; ok && nic.Enabled() { 1347 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil { 1348 return makeRoute( 1349 netProto, 1350 tcpip.Address{}, /* gateway */ 1351 localAddr, 1352 remoteAddr, 1353 nic, /* outboundNIC */ 1354 nic, /* localAddressNIC*/ 1355 addressEndpoint, 1356 s.handleLocal, 1357 multicastLoop, 1358 ), nil 1359 } 1360 } 1361 1362 if isLoopback { 1363 return nil, &tcpip.ErrBadLocalAddress{} 1364 } 1365 return nil, &tcpip.ErrNetworkUnreachable{} 1366 } 1367 1368 onlyGlobalAddresses := !header.IsV6LinkLocalUnicastAddress(localAddr) && !isLinkLocal 1369 1370 // Find a route to the remote with the route table. 1371 var chosenRoute tcpip.Route 1372 if r := func() *Route { 1373 s.routeMu.RLock() 1374 defer s.routeMu.RUnlock() 1375 1376 for _, route := range s.routeTable { 1377 if remoteAddr.BitLen() != 0 && !route.Destination.Contains(remoteAddr) { 1378 continue 1379 } 1380 1381 nic, ok := s.nics[route.NIC] 1382 if !ok || !nic.Enabled() { 1383 continue 1384 } 1385 1386 if id == 0 || id == route.NIC { 1387 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, route.SourceHint, netProto); addressEndpoint != nil { 1388 var gateway tcpip.Address 1389 if needRoute { 1390 gateway = route.Gateway 1391 } 1392 r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop) 1393 if r == nil { 1394 panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr)) 1395 } 1396 return r 1397 } 1398 } 1399 1400 // If the stack has forwarding enabled, we haven't found a valid route to 1401 // the remote address yet, and we are routing locally generated traffic, 1402 // keep track of the first valid route. We keep iterating because we 1403 // prefer routes that let us use a local address that is assigned to the 1404 // outgoing interface. There is no requirement to do this from any RFC 1405 // but simply a choice made to better follow a strong host model which 1406 // the netstack follows at the time of writing. 1407 // 1408 // Note that for incoming traffic that we are forwarding (for which the 1409 // NIC and local address are unspecified), we do not keep iterating, as 1410 // there is no reason to prefer routes that let us use a local address 1411 // when routing forwarded (as opposed to locally-generated) traffic. 1412 locallyGenerated := (id != 0 || localAddr != tcpip.Address{}) 1413 if onlyGlobalAddresses && chosenRoute.Equal(tcpip.Route{}) && isNICForwarding(nic, netProto) { 1414 if locallyGenerated { 1415 chosenRoute = route 1416 continue 1417 } 1418 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, route.SourceHint, route.Gateway, netProto, multicastLoop); r != nil { 1419 return r 1420 } 1421 } 1422 } 1423 1424 return nil 1425 }(); r != nil { 1426 return r, nil 1427 } 1428 1429 if !chosenRoute.Equal(tcpip.Route{}) { 1430 // At this point we know the stack has forwarding enabled since chosenRoute is 1431 // only set when forwarding is enabled. 1432 nic, ok := s.nics[chosenRoute.NIC] 1433 if !ok { 1434 // If the route's NIC was invalid, we should not have chosen the route. 1435 panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC)) 1436 } 1437 1438 var gateway tcpip.Address 1439 if needRoute { 1440 gateway = chosenRoute.Gateway 1441 } 1442 1443 // Use the specified NIC to get the local address endpoint. 1444 if id != 0 { 1445 if aNIC, ok := s.nics[id]; ok { 1446 if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, chosenRoute.SourceHint, netProto); addressEndpoint != nil { 1447 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1448 return r, nil 1449 } 1450 } 1451 } 1452 1453 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1454 return nil, &tcpip.ErrHostUnreachable{} 1455 } 1456 1457 if id == 0 { 1458 // If an interface is not specified, try to find a NIC that holds the local 1459 // address endpoint to construct a route. 1460 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, chosenRoute.SourceHint, gateway, netProto, multicastLoop); r != nil { 1461 return r, nil 1462 } 1463 } 1464 } 1465 1466 if needRoute { 1467 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1468 return nil, &tcpip.ErrHostUnreachable{} 1469 } 1470 if header.IsV6LoopbackAddress(remoteAddr) { 1471 return nil, &tcpip.ErrBadLocalAddress{} 1472 } 1473 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1474 return nil, &tcpip.ErrNetworkUnreachable{} 1475 } 1476 1477 // CheckNetworkProtocol checks if a given network protocol is enabled in the 1478 // stack. 1479 func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool { 1480 _, ok := s.networkProtocols[protocol] 1481 return ok 1482 } 1483 1484 // CheckDuplicateAddress performs duplicate address detection for the address on 1485 // the specified interface. 1486 func (s *Stack) CheckDuplicateAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) { 1487 s.mu.RLock() 1488 nic, ok := s.nics[nicID] 1489 s.mu.RUnlock() 1490 1491 if !ok { 1492 return 0, &tcpip.ErrUnknownNICID{} 1493 } 1494 1495 return nic.checkDuplicateAddress(protocol, addr, h) 1496 } 1497 1498 // CheckLocalAddress determines if the given local address exists, and if it 1499 // does, returns the id of the NIC it's bound to. Returns 0 if the address 1500 // does not exist. 1501 func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID { 1502 s.mu.RLock() 1503 defer s.mu.RUnlock() 1504 1505 // If a NIC is specified, use its NIC id. 1506 if nicID != 0 { 1507 nic, ok := s.nics[nicID] 1508 if !ok { 1509 return 0 1510 } 1511 // In IPv4, linux only checks the interface. If it matches, then it does 1512 // not bother with the address. 1513 // https://github.com/torvalds/linux/blob/15205c2829ca2cbb5ece5ceaafe1171a8470e62b/net/ipv4/igmp.c#L1829-L1837 1514 if protocol == header.IPv4ProtocolNumber { 1515 return nic.id 1516 } 1517 if nic.CheckLocalAddress(protocol, addr) { 1518 return nic.id 1519 } 1520 return 0 1521 } 1522 1523 // Go through all the NICs. 1524 for _, nic := range s.nics { 1525 if nic.CheckLocalAddress(protocol, addr) { 1526 return nic.id 1527 } 1528 } 1529 1530 return 0 1531 } 1532 1533 // SetPromiscuousMode enables or disables promiscuous mode in the given NIC. 1534 func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error { 1535 s.mu.RLock() 1536 defer s.mu.RUnlock() 1537 1538 nic, ok := s.nics[nicID] 1539 if !ok { 1540 return &tcpip.ErrUnknownNICID{} 1541 } 1542 1543 nic.setPromiscuousMode(enable) 1544 1545 return nil 1546 } 1547 1548 // SetSpoofing enables or disables address spoofing in the given NIC, allowing 1549 // endpoints to bind to any address in the NIC. 1550 func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error { 1551 s.mu.RLock() 1552 defer s.mu.RUnlock() 1553 1554 nic, ok := s.nics[nicID] 1555 if !ok { 1556 return &tcpip.ErrUnknownNICID{} 1557 } 1558 1559 nic.setSpoofing(enable) 1560 1561 return nil 1562 } 1563 1564 // LinkResolutionResult is the result of a link address resolution attempt. 1565 type LinkResolutionResult struct { 1566 LinkAddress tcpip.LinkAddress 1567 Err tcpip.Error 1568 } 1569 1570 // GetLinkAddress finds the link address corresponding to a network address. 1571 // 1572 // Returns ErrNotSupported if the stack is not configured with a link address 1573 // resolver for the specified network protocol. 1574 // 1575 // Returns ErrWouldBlock if the link address is not readily available, along 1576 // with a notification channel for the caller to block on. Triggers address 1577 // resolution asynchronously. 1578 // 1579 // onResolve will be called either immediately, if resolution is not required, 1580 // or when address resolution is complete, with the resolved link address and 1581 // whether resolution succeeded. 1582 // 1583 // If specified, the local address must be an address local to the interface 1584 // the neighbor cache belongs to. The local address is the source address of 1585 // a packet prompting NUD/link address resolution. 1586 func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error { 1587 s.mu.RLock() 1588 nic, ok := s.nics[nicID] 1589 s.mu.RUnlock() 1590 if !ok { 1591 return &tcpip.ErrUnknownNICID{} 1592 } 1593 1594 return nic.getLinkAddress(addr, localAddr, protocol, onResolve) 1595 } 1596 1597 // Neighbors returns all IP to MAC address associations. 1598 func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) { 1599 s.mu.RLock() 1600 nic, ok := s.nics[nicID] 1601 s.mu.RUnlock() 1602 1603 if !ok { 1604 return nil, &tcpip.ErrUnknownNICID{} 1605 } 1606 1607 return nic.neighbors(protocol) 1608 } 1609 1610 // AddStaticNeighbor statically associates an IP address to a MAC address. 1611 func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error { 1612 s.mu.RLock() 1613 nic, ok := s.nics[nicID] 1614 s.mu.RUnlock() 1615 1616 if !ok { 1617 return &tcpip.ErrUnknownNICID{} 1618 } 1619 1620 return nic.addStaticNeighbor(addr, protocol, linkAddr) 1621 } 1622 1623 // RemoveNeighbor removes an IP to MAC address association previously created 1624 // either automatically or by AddStaticNeighbor. Returns ErrBadAddress if there 1625 // is no association with the provided address. 1626 func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { 1627 s.mu.RLock() 1628 nic, ok := s.nics[nicID] 1629 s.mu.RUnlock() 1630 1631 if !ok { 1632 return &tcpip.ErrUnknownNICID{} 1633 } 1634 1635 return nic.removeNeighbor(protocol, addr) 1636 } 1637 1638 // ClearNeighbors removes all IP to MAC address associations. 1639 func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error { 1640 s.mu.RLock() 1641 nic, ok := s.nics[nicID] 1642 s.mu.RUnlock() 1643 1644 if !ok { 1645 return &tcpip.ErrUnknownNICID{} 1646 } 1647 1648 return nic.clearNeighbors(protocol) 1649 } 1650 1651 // RegisterTransportEndpoint registers the given endpoint with the stack 1652 // transport dispatcher. Received packets that match the provided id will be 1653 // delivered to the given endpoint; specifying a nic is optional, but 1654 // nic-specific IDs have precedence over global ones. 1655 func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1656 return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1657 } 1658 1659 // CheckRegisterTransportEndpoint checks if an endpoint can be registered with 1660 // the stack transport dispatcher. 1661 func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1662 return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice) 1663 } 1664 1665 // UnregisterTransportEndpoint removes the endpoint with the given id from the 1666 // stack transport dispatcher. 1667 func (s *Stack) UnregisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1668 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1669 } 1670 1671 // StartTransportEndpointCleanup removes the endpoint with the given id from 1672 // the stack transport dispatcher. It also transitions it to the cleanup stage. 1673 func (s *Stack) StartTransportEndpointCleanup(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1674 s.cleanupEndpointsMu.Lock() 1675 s.cleanupEndpoints[ep] = struct{}{} 1676 s.cleanupEndpointsMu.Unlock() 1677 1678 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1679 } 1680 1681 // CompleteTransportEndpointCleanup removes the endpoint from the cleanup 1682 // stage. 1683 func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) { 1684 s.cleanupEndpointsMu.Lock() 1685 delete(s.cleanupEndpoints, ep) 1686 s.cleanupEndpointsMu.Unlock() 1687 } 1688 1689 // FindTransportEndpoint finds an endpoint that most closely matches the provided 1690 // id. If no endpoint is found it returns nil. 1691 func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint { 1692 return s.demux.findTransportEndpoint(netProto, transProto, id, nicID) 1693 } 1694 1695 // RegisterRawTransportEndpoint registers the given endpoint with the stack 1696 // transport dispatcher. Received packets that match the provided transport 1697 // protocol will be delivered to the given endpoint. 1698 func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error { 1699 return s.demux.registerRawEndpoint(netProto, transProto, ep) 1700 } 1701 1702 // UnregisterRawTransportEndpoint removes the endpoint for the transport 1703 // protocol from the stack transport dispatcher. 1704 func (s *Stack) UnregisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) { 1705 s.demux.unregisterRawEndpoint(netProto, transProto, ep) 1706 } 1707 1708 // RegisterRestoredEndpoint records e as an endpoint that has been restored on 1709 // this stack. 1710 func (s *Stack) RegisterRestoredEndpoint(e RestoredEndpoint) { 1711 s.mu.Lock() 1712 defer s.mu.Unlock() 1713 1714 s.restoredEndpoints = append(s.restoredEndpoints, e) 1715 } 1716 1717 // RegisterResumableEndpoint records e as an endpoint that has to be resumed. 1718 func (s *Stack) RegisterResumableEndpoint(e ResumableEndpoint) { 1719 s.mu.Lock() 1720 defer s.mu.Unlock() 1721 1722 s.resumableEndpoints = append(s.resumableEndpoints, e) 1723 } 1724 1725 // RegisteredEndpoints returns all endpoints which are currently registered. 1726 func (s *Stack) RegisteredEndpoints() []TransportEndpoint { 1727 s.mu.Lock() 1728 defer s.mu.Unlock() 1729 1730 var es []TransportEndpoint 1731 for _, e := range s.demux.protocol { 1732 es = append(es, e.transportEndpoints()...) 1733 } 1734 return es 1735 } 1736 1737 // CleanupEndpoints returns endpoints currently in the cleanup state. 1738 func (s *Stack) CleanupEndpoints() []TransportEndpoint { 1739 s.cleanupEndpointsMu.Lock() 1740 defer s.cleanupEndpointsMu.Unlock() 1741 1742 es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints)) 1743 for e := range s.cleanupEndpoints { 1744 es = append(es, e) 1745 } 1746 return es 1747 } 1748 1749 // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful 1750 // for restoring a stack after a save. 1751 func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) { 1752 s.cleanupEndpointsMu.Lock() 1753 defer s.cleanupEndpointsMu.Unlock() 1754 1755 for _, e := range es { 1756 s.cleanupEndpoints[e] = struct{}{} 1757 } 1758 } 1759 1760 // Close closes all currently registered transport endpoints. 1761 // 1762 // Endpoints created or modified during this call may not get closed. 1763 func (s *Stack) Close() { 1764 for _, e := range s.RegisteredEndpoints() { 1765 e.Abort() 1766 } 1767 for _, p := range s.transportProtocols { 1768 p.proto.Close() 1769 } 1770 for _, p := range s.networkProtocols { 1771 p.Close() 1772 } 1773 } 1774 1775 // Wait waits for all transport and link endpoints to halt their worker 1776 // goroutines. 1777 // 1778 // Endpoints created or modified during this call may not get waited on. 1779 // 1780 // Note that link endpoints must be stopped via an implementation specific 1781 // mechanism. 1782 func (s *Stack) Wait() { 1783 for _, e := range s.RegisteredEndpoints() { 1784 e.Wait() 1785 } 1786 for _, e := range s.CleanupEndpoints() { 1787 e.Wait() 1788 } 1789 for _, p := range s.transportProtocols { 1790 p.proto.Wait() 1791 } 1792 for _, p := range s.networkProtocols { 1793 p.Wait() 1794 } 1795 1796 s.mu.Lock() 1797 defer s.mu.Unlock() 1798 1799 for id, n := range s.nics { 1800 // Remove NIC to ensure that qDisc goroutines are correctly 1801 // terminated on stack teardown. 1802 s.removeNICLocked(id) 1803 n.NetworkLinkEndpoint.Wait() 1804 } 1805 } 1806 1807 // Destroy destroys the stack with all endpoints. 1808 func (s *Stack) Destroy() { 1809 s.Close() 1810 s.Wait() 1811 } 1812 1813 // Pause pauses any protocol level background workers. 1814 func (s *Stack) Pause() { 1815 for _, p := range s.transportProtocols { 1816 p.proto.Pause() 1817 } 1818 } 1819 1820 // Restore restarts the stack after a restore. This must be called after the 1821 // entire system has been restored. 1822 func (s *Stack) Restore() { 1823 // RestoredEndpoint.Restore() may call other methods on s, so we can't hold 1824 // s.mu while restoring the endpoints. 1825 s.mu.Lock() 1826 eps := s.restoredEndpoints 1827 s.restoredEndpoints = nil 1828 s.mu.Unlock() 1829 for _, e := range eps { 1830 e.Restore(s) 1831 } 1832 // Now resume any protocol level background workers. 1833 for _, p := range s.transportProtocols { 1834 p.proto.Resume() 1835 } 1836 } 1837 1838 // Resume resumes the stack after a save. 1839 func (s *Stack) Resume() { 1840 s.mu.Lock() 1841 eps := s.resumableEndpoints 1842 s.resumableEndpoints = nil 1843 s.mu.Unlock() 1844 for _, e := range eps { 1845 e.Resume() 1846 } 1847 // Now resume any protocol level background workers. 1848 for _, p := range s.transportProtocols { 1849 p.proto.Resume() 1850 } 1851 } 1852 1853 // RegisterPacketEndpoint registers ep with the stack, causing it to receive 1854 // all traffic of the specified netProto on the given NIC. If nicID is 0, it 1855 // receives traffic from every NIC. 1856 func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error { 1857 s.mu.Lock() 1858 defer s.mu.Unlock() 1859 1860 // If no NIC is specified, capture on all devices. 1861 if nicID == 0 { 1862 // Register with each NIC. 1863 for _, nic := range s.nics { 1864 nic.registerPacketEndpoint(netProto, ep) 1865 } 1866 return nil 1867 } 1868 1869 // Capture on a specific device. 1870 nic, ok := s.nics[nicID] 1871 if !ok { 1872 return &tcpip.ErrUnknownNICID{} 1873 } 1874 nic.registerPacketEndpoint(netProto, ep) 1875 1876 return nil 1877 } 1878 1879 // UnregisterPacketEndpoint unregisters ep for packets of the specified 1880 // netProto from the specified NIC. If nicID is 0, ep is unregistered from all 1881 // NICs. 1882 func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1883 s.mu.Lock() 1884 defer s.mu.Unlock() 1885 s.unregisterPacketEndpointLocked(nicID, netProto, ep) 1886 } 1887 1888 // +checklocks:s.mu 1889 func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1890 // If no NIC is specified, unregister on all devices. 1891 if nicID == 0 { 1892 // Unregister with each NIC. 1893 for _, nic := range s.nics { 1894 nic.unregisterPacketEndpoint(netProto, ep) 1895 } 1896 return 1897 } 1898 1899 // Unregister in a single device. 1900 nic, ok := s.nics[nicID] 1901 if !ok { 1902 return 1903 } 1904 nic.unregisterPacketEndpoint(netProto, ep) 1905 } 1906 1907 // WritePacketToRemote writes a payload on the specified NIC using the provided 1908 // network protocol and remote link address. 1909 func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1910 s.mu.Lock() 1911 nic, ok := s.nics[nicID] 1912 s.mu.Unlock() 1913 if !ok { 1914 return &tcpip.ErrUnknownDevice{} 1915 } 1916 pkt := NewPacketBuffer(PacketBufferOptions{ 1917 ReserveHeaderBytes: int(nic.MaxHeaderLength()), 1918 Payload: payload, 1919 }) 1920 defer pkt.DecRef() 1921 pkt.NetworkProtocolNumber = netProto 1922 return nic.WritePacketToRemote(remote, pkt) 1923 } 1924 1925 // WriteRawPacket writes data directly to the specified NIC without adding any 1926 // headers. 1927 func (s *Stack) WriteRawPacket(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1928 s.mu.RLock() 1929 nic, ok := s.nics[nicID] 1930 s.mu.RUnlock() 1931 if !ok { 1932 return &tcpip.ErrUnknownNICID{} 1933 } 1934 1935 pkt := NewPacketBuffer(PacketBufferOptions{ 1936 Payload: payload, 1937 }) 1938 defer pkt.DecRef() 1939 pkt.NetworkProtocolNumber = proto 1940 return nic.writeRawPacketWithLinkHeaderInPayload(pkt) 1941 } 1942 1943 // NetworkProtocolInstance returns the protocol instance in the stack for the 1944 // specified network protocol. This method is public for protocol implementers 1945 // and tests to use. 1946 func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol { 1947 if p, ok := s.networkProtocols[num]; ok { 1948 return p 1949 } 1950 return nil 1951 } 1952 1953 // TransportProtocolInstance returns the protocol instance in the stack for the 1954 // specified transport protocol. This method is public for protocol implementers 1955 // and tests to use. 1956 func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol { 1957 if pState, ok := s.transportProtocols[num]; ok { 1958 return pState.proto 1959 } 1960 return nil 1961 } 1962 1963 // AddTCPProbe installs a probe function that will be invoked on every segment 1964 // received by a given TCP endpoint. The probe function is passed a copy of the 1965 // TCP endpoint state before and after processing of the segment. 1966 // 1967 // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints 1968 // created prior to this call will not call the probe function. 1969 // 1970 // Further, installing two different probes back to back can result in some 1971 // endpoints calling the first one and some the second one. There is no 1972 // guarantee provided on which probe will be invoked. Ideally this should only 1973 // be called once per stack. 1974 func (s *Stack) AddTCPProbe(probe TCPProbeFunc) { 1975 s.tcpProbeFunc.Store(probe) 1976 } 1977 1978 // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil 1979 // otherwise. 1980 func (s *Stack) GetTCPProbe() TCPProbeFunc { 1981 p := s.tcpProbeFunc.Load() 1982 if p == nil { 1983 return nil 1984 } 1985 return p.(TCPProbeFunc) 1986 } 1987 1988 // RemoveTCPProbe removes an installed TCP probe. 1989 // 1990 // NOTE: This only ensures that endpoints created after this call do not 1991 // have a probe attached. Endpoints already created will continue to invoke 1992 // TCP probe. 1993 func (s *Stack) RemoveTCPProbe() { 1994 // This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics. 1995 s.tcpProbeFunc.Store(TCPProbeFunc(nil)) 1996 } 1997 1998 // JoinGroup joins the given multicast group on the given NIC. 1999 func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 2000 s.mu.RLock() 2001 defer s.mu.RUnlock() 2002 2003 if nic, ok := s.nics[nicID]; ok { 2004 return nic.joinGroup(protocol, multicastAddr) 2005 } 2006 return &tcpip.ErrUnknownNICID{} 2007 } 2008 2009 // LeaveGroup leaves the given multicast group on the given NIC. 2010 func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 2011 s.mu.RLock() 2012 defer s.mu.RUnlock() 2013 2014 if nic, ok := s.nics[nicID]; ok { 2015 return nic.leaveGroup(protocol, multicastAddr) 2016 } 2017 return &tcpip.ErrUnknownNICID{} 2018 } 2019 2020 // IsInGroup returns true if the NIC with ID nicID has joined the multicast 2021 // group multicastAddr. 2022 func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) { 2023 s.mu.RLock() 2024 defer s.mu.RUnlock() 2025 2026 if nic, ok := s.nics[nicID]; ok { 2027 return nic.isInGroup(multicastAddr), nil 2028 } 2029 return false, &tcpip.ErrUnknownNICID{} 2030 } 2031 2032 // IPTables returns the stack's iptables. 2033 func (s *Stack) IPTables() *IPTables { 2034 return s.tables 2035 } 2036 2037 // ICMPLimit returns the maximum number of ICMP messages that can be sent 2038 // in one second. 2039 func (s *Stack) ICMPLimit() rate.Limit { 2040 return s.icmpRateLimiter.Limit() 2041 } 2042 2043 // SetICMPLimit sets the maximum number of ICMP messages that be sent 2044 // in one second. 2045 func (s *Stack) SetICMPLimit(newLimit rate.Limit) { 2046 s.icmpRateLimiter.SetLimit(newLimit) 2047 } 2048 2049 // ICMPBurst returns the maximum number of ICMP messages that can be sent 2050 // in a single burst. 2051 func (s *Stack) ICMPBurst() int { 2052 return s.icmpRateLimiter.Burst() 2053 } 2054 2055 // SetICMPBurst sets the maximum number of ICMP messages that can be sent 2056 // in a single burst. 2057 func (s *Stack) SetICMPBurst(burst int) { 2058 s.icmpRateLimiter.SetBurst(burst) 2059 } 2060 2061 // AllowICMPMessage returns true if we the rate limiter allows at least one 2062 // ICMP message to be sent at this instant. 2063 func (s *Stack) AllowICMPMessage() bool { 2064 return s.icmpRateLimiter.Allow() 2065 } 2066 2067 // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol 2068 // number installed on the specified NIC. 2069 func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) { 2070 s.mu.Lock() 2071 defer s.mu.Unlock() 2072 2073 nic, ok := s.nics[nicID] 2074 if !ok { 2075 return nil, &tcpip.ErrUnknownNICID{} 2076 } 2077 2078 return nic.getNetworkEndpoint(proto), nil 2079 } 2080 2081 // NUDConfigurations gets the per-interface NUD configurations. 2082 func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) { 2083 s.mu.RLock() 2084 nic, ok := s.nics[id] 2085 s.mu.RUnlock() 2086 2087 if !ok { 2088 return NUDConfigurations{}, &tcpip.ErrUnknownNICID{} 2089 } 2090 2091 return nic.nudConfigs(proto) 2092 } 2093 2094 // SetNUDConfigurations sets the per-interface NUD configurations. 2095 // 2096 // Note, if c contains invalid NUD configuration values, it will be fixed to 2097 // use default values for the erroneous values. 2098 func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error { 2099 s.mu.RLock() 2100 nic, ok := s.nics[id] 2101 s.mu.RUnlock() 2102 2103 if !ok { 2104 return &tcpip.ErrUnknownNICID{} 2105 } 2106 2107 return nic.setNUDConfigs(proto, c) 2108 } 2109 2110 // Seed returns a 32 bit value that can be used as a seed value. 2111 // 2112 // NOTE: The seed is generated once during stack initialization only. 2113 func (s *Stack) Seed() uint32 { 2114 return s.seed 2115 } 2116 2117 // InsecureRNG returns a reference to a pseudo random generator that can be used 2118 // to generate random numbers as required. It is not cryptographically secure 2119 // and should not be used for security sensitive work. 2120 func (s *Stack) InsecureRNG() *rand.Rand { 2121 return s.insecureRNG 2122 } 2123 2124 // SecureRNG returns the stack's cryptographically secure random number 2125 // generator. 2126 func (s *Stack) SecureRNG() cryptorand.RNG { 2127 return s.secureRNG 2128 } 2129 2130 // FindNICNameFromID returns the name of the NIC for the given NICID. 2131 func (s *Stack) FindNICNameFromID(id tcpip.NICID) string { 2132 s.mu.RLock() 2133 defer s.mu.RUnlock() 2134 2135 nic, ok := s.nics[id] 2136 if !ok { 2137 return "" 2138 } 2139 2140 return nic.Name() 2141 } 2142 2143 // ParseResult indicates the result of a parsing attempt. 2144 type ParseResult int 2145 2146 const ( 2147 // ParsedOK indicates that a packet was successfully parsed. 2148 ParsedOK ParseResult = iota 2149 2150 // UnknownTransportProtocol indicates that the transport protocol is unknown. 2151 UnknownTransportProtocol 2152 2153 // TransportLayerParseError indicates that the transport packet was not 2154 // successfully parsed. 2155 TransportLayerParseError 2156 ) 2157 2158 // ParsePacketBufferTransport parses the provided packet buffer's transport 2159 // header. 2160 func (s *Stack) ParsePacketBufferTransport(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) ParseResult { 2161 pkt.TransportProtocolNumber = protocol 2162 // Parse the transport header if present. 2163 state, ok := s.transportProtocols[protocol] 2164 if !ok { 2165 return UnknownTransportProtocol 2166 } 2167 2168 if !state.proto.Parse(pkt) { 2169 return TransportLayerParseError 2170 } 2171 2172 return ParsedOK 2173 } 2174 2175 // networkProtocolNumbers returns the network protocol numbers the stack is 2176 // configured with. 2177 func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber { 2178 protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols)) 2179 for p := range s.networkProtocols { 2180 protos = append(protos, p) 2181 } 2182 return protos 2183 } 2184 2185 func isSubnetBroadcastOnNIC(nic *nic, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2186 addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint) 2187 if addressEndpoint == nil { 2188 return false 2189 } 2190 2191 subnet := addressEndpoint.Subnet() 2192 addressEndpoint.DecRef() 2193 return subnet.IsBroadcast(addr) 2194 } 2195 2196 // IsSubnetBroadcast returns true if the provided address is a subnet-local 2197 // broadcast address on the specified NIC and protocol. 2198 // 2199 // Returns false if the NIC is unknown or if the protocol is unknown or does 2200 // not support addressing. 2201 // 2202 // If the NIC is not specified, the stack will check all NICs. 2203 func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2204 s.mu.RLock() 2205 defer s.mu.RUnlock() 2206 2207 if nicID != 0 { 2208 nic, ok := s.nics[nicID] 2209 if !ok { 2210 return false 2211 } 2212 2213 return isSubnetBroadcastOnNIC(nic, protocol, addr) 2214 } 2215 2216 for _, nic := range s.nics { 2217 if isSubnetBroadcastOnNIC(nic, protocol, addr) { 2218 return true 2219 } 2220 } 2221 2222 return false 2223 } 2224 2225 // PacketEndpointWriteSupported returns true iff packet endpoints support write 2226 // operations. 2227 func (s *Stack) PacketEndpointWriteSupported() bool { 2228 return s.packetEndpointWriteSupported 2229 }