github.com/sagernet/gvisor@v0.0.0-20240428053021-e691de28565f/pkg/tcpip/stack/stack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package stack provides the glue between networking protocols and the 16 // consumers of the networking stack. 17 // 18 // For consumers, the only function of interest is New(), everything else is 19 // provided by the tcpip/public package. 20 package stack 21 22 import ( 23 "encoding/binary" 24 "fmt" 25 "github.com/sagernet/sing/common" 26 "io" 27 "math/rand" 28 "sync/atomic" 29 "time" 30 31 "golang.org/x/time/rate" 32 "github.com/sagernet/gvisor/pkg/atomicbitops" 33 "github.com/sagernet/gvisor/pkg/buffer" 34 "github.com/sagernet/gvisor/pkg/log" 35 cryptorand "github.com/sagernet/gvisor/pkg/rand" 36 "github.com/sagernet/gvisor/pkg/tcpip" 37 "github.com/sagernet/gvisor/pkg/tcpip/header" 38 "github.com/sagernet/gvisor/pkg/tcpip/ports" 39 "github.com/sagernet/gvisor/pkg/waiter" 40 ) 41 42 const ( 43 // DefaultTOS is the default type of service value for network endpoints. 44 DefaultTOS = 0 45 ) 46 47 type transportProtocolState struct { 48 proto TransportProtocol 49 defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool 50 } 51 52 // RestoredEndpoint is an endpoint that needs to be restored. 53 type RestoredEndpoint interface { 54 // Restore restores an endpoint. This can be used to restart background 55 // workers such as protocol goroutines. This must be called after all 56 // indirect dependencies of the endpoint has been restored, which 57 // generally implies at the end of the restore process. 58 Restore(*Stack) 59 } 60 61 // ResumableEndpoint is an endpoint that needs to be resumed after save. 62 type ResumableEndpoint interface { 63 // Resume resumes an endpoint. 64 Resume() 65 } 66 67 // uniqueIDGenerator is a default unique ID generator. 68 type uniqueIDGenerator atomicbitops.Uint64 69 70 func (u *uniqueIDGenerator) UniqueID() uint64 { 71 return ((*atomicbitops.Uint64)(u)).Add(1) 72 } 73 74 var netRawMissingLogger = log.BasicRateLimitedLogger(time.Minute) 75 76 // Stack is a networking stack, with all supported protocols, NICs, and route 77 // table. 78 // 79 // LOCK ORDERING: mu > routeMu. 80 type Stack struct { 81 transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState 82 networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol 83 84 // rawFactory creates raw endpoints. If nil, raw endpoints are 85 // disabled. It is set during Stack creation and is immutable. 86 rawFactory RawFactory 87 packetEndpointWriteSupported bool 88 89 demux *transportDemuxer 90 91 stats tcpip.Stats 92 93 // routeMu protects annotated fields below. 94 routeMu routeStackRWMutex 95 96 // +checklocks:routeMu 97 routeTable []tcpip.Route 98 99 mu stackRWMutex 100 // +checklocks:mu 101 nics map[tcpip.NICID]*nic 102 defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{} 103 104 // cleanupEndpointsMu protects cleanupEndpoints. 105 cleanupEndpointsMu cleanupEndpointsMutex 106 // +checklocks:cleanupEndpointsMu 107 cleanupEndpoints map[TransportEndpoint]struct{} 108 109 *ports.PortManager 110 111 // If not nil, then any new endpoints will have this probe function 112 // invoked everytime they receive a TCP segment. 113 tcpProbeFunc atomic.Value // TCPProbeFunc 114 115 // clock is used to generate user-visible times. 116 clock tcpip.Clock 117 118 // handleLocal allows non-loopback interfaces to loop packets. 119 handleLocal bool 120 121 // tables are the iptables packet filtering and manipulation rules. 122 // TODO(gvisor.dev/issue/4595): S/R this field. 123 tables *IPTables 124 125 // restoredEndpoints is a list of endpoints that need to be restored if the 126 // stack is being restored. 127 restoredEndpoints []RestoredEndpoint 128 129 // resumableEndpoints is a list of endpoints that need to be resumed 130 // after save. 131 resumableEndpoints []ResumableEndpoint 132 133 // icmpRateLimiter is a global rate limiter for all ICMP messages generated 134 // by the stack. 135 icmpRateLimiter *ICMPRateLimiter 136 137 // seed is a one-time random value initialized at stack startup. 138 // 139 // TODO(gvisor.dev/issue/940): S/R this field. 140 seed uint32 141 142 // nudConfigs is the default NUD configurations used by interfaces. 143 nudConfigs NUDConfigurations 144 145 // nudDisp is the NUD event dispatcher that is used to send the netstack 146 // integrator NUD related events. 147 nudDisp NUDDispatcher 148 149 // uniqueIDGenerator is a generator of unique identifiers. 150 uniqueIDGenerator UniqueID 151 152 // randomGenerator is an injectable pseudo random generator that can be 153 // used when a random number is required. It must not be used in 154 // security-sensitive contexts. 155 insecureRNG *rand.Rand 156 157 // secureRNG is a cryptographically secure random number generator. 158 secureRNG cryptorand.RNG 159 160 // sendBufferSize holds the min/default/max send buffer sizes for 161 // endpoints other than TCP. 162 sendBufferSize tcpip.SendBufferSizeOption 163 164 // receiveBufferSize holds the min/default/max receive buffer sizes for 165 // endpoints other than TCP. 166 receiveBufferSize tcpip.ReceiveBufferSizeOption 167 168 // tcpInvalidRateLimit is the maximal rate for sending duplicate 169 // acknowledgements in response to incoming TCP packets that are for an existing 170 // connection but that are invalid due to any of the following reasons: 171 // 172 // a) out-of-window sequence number. 173 // b) out-of-window acknowledgement number. 174 // c) PAWS check failure (when implemented). 175 // 176 // This is required to prevent potential ACK loops. 177 // Setting this to 0 will disable all rate limiting. 178 tcpInvalidRateLimit time.Duration 179 180 // tsOffsetSecret is the secret key for generating timestamp offsets 181 // initialized at stack startup. 182 tsOffsetSecret uint32 183 } 184 185 // UniqueID is an abstract generator of unique identifiers. 186 type UniqueID interface { 187 UniqueID() uint64 188 } 189 190 // NetworkProtocolFactory instantiates a network protocol. 191 // 192 // NetworkProtocolFactory must not attempt to modify the stack, it may only 193 // query the stack. 194 type NetworkProtocolFactory func(*Stack) NetworkProtocol 195 196 // TransportProtocolFactory instantiates a transport protocol. 197 // 198 // TransportProtocolFactory must not attempt to modify the stack, it may only 199 // query the stack. 200 type TransportProtocolFactory func(*Stack) TransportProtocol 201 202 // Options contains optional Stack configuration. 203 type Options struct { 204 // NetworkProtocols lists the network protocols to enable. 205 NetworkProtocols []NetworkProtocolFactory 206 207 // TransportProtocols lists the transport protocols to enable. 208 TransportProtocols []TransportProtocolFactory 209 210 // Clock is an optional clock used for timekeeping. 211 // 212 // If Clock is nil, tcpip.NewStdClock() will be used. 213 Clock tcpip.Clock 214 215 // Stats are optional statistic counters. 216 Stats tcpip.Stats 217 218 // HandleLocal indicates whether packets destined to their source 219 // should be handled by the stack internally (true) or outside the 220 // stack (false). 221 HandleLocal bool 222 223 // UniqueID is an optional generator of unique identifiers. 224 UniqueID UniqueID 225 226 // NUDConfigs is the default NUD configurations used by interfaces. 227 NUDConfigs NUDConfigurations 228 229 // NUDDisp is the NUD event dispatcher that an integrator can provide to 230 // receive NUD related events. 231 NUDDisp NUDDispatcher 232 233 // RawFactory produces raw endpoints. Raw endpoints are enabled only if 234 // this is non-nil. 235 RawFactory RawFactory 236 237 // AllowPacketEndpointWrite determines if packet endpoints support write 238 // operations. 239 AllowPacketEndpointWrite bool 240 241 // RandSource is an optional source to use to generate random 242 // numbers. If omitted it defaults to a Source seeded by the data 243 // returned by the stack secure RNG. 244 // 245 // RandSource must be thread-safe. 246 RandSource rand.Source 247 248 // IPTables are the initial iptables rules. If nil, DefaultIPTables will be 249 // used to construct the initial iptables rules. 250 // all traffic. 251 IPTables *IPTables 252 253 // DefaultIPTables is an optional iptables rules constructor that is called 254 // if IPTables is nil. If both fields are nil, iptables will allow all 255 // traffic. 256 DefaultIPTables func(clock tcpip.Clock, rand *rand.Rand) *IPTables 257 258 // SecureRNG is a cryptographically secure random number generator. 259 SecureRNG io.Reader 260 } 261 262 // TransportEndpointInfo holds useful information about a transport endpoint 263 // which can be queried by monitoring tools. 264 // 265 // +stateify savable 266 type TransportEndpointInfo struct { 267 // The following fields are initialized at creation time and are 268 // immutable. 269 270 NetProto tcpip.NetworkProtocolNumber 271 TransProto tcpip.TransportProtocolNumber 272 273 // The following fields are protected by endpoint mu. 274 275 ID TransportEndpointID 276 // BindNICID and bindAddr are set via calls to Bind(). They are used to 277 // reject attempts to send data or connect via a different NIC or 278 // address 279 BindNICID tcpip.NICID 280 BindAddr tcpip.Address 281 // RegisterNICID is the default NICID registered as a side-effect of 282 // connect or datagram write. 283 RegisterNICID tcpip.NICID 284 } 285 286 // AddrNetProtoLocked unwraps the specified address if it is a V4-mapped V6 287 // address and returns the network protocol number to be used to communicate 288 // with the specified address. It returns an error if the passed address is 289 // incompatible with the receiver. 290 // 291 // Preconditon: the parent endpoint mu must be held while calling this method. 292 func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) { 293 netProto := t.NetProto 294 switch addr.Addr.BitLen() { 295 case header.IPv4AddressSizeBits: 296 netProto = header.IPv4ProtocolNumber 297 case header.IPv6AddressSizeBits: 298 if header.IsV4MappedAddress(addr.Addr) { 299 netProto = header.IPv4ProtocolNumber 300 addr.Addr = tcpip.AddrFrom4Slice(addr.Addr.AsSlice()[header.IPv6AddressSize-header.IPv4AddressSize:]) 301 if addr.Addr == header.IPv4Any { 302 addr.Addr = tcpip.Address{} 303 } 304 } 305 } 306 307 switch t.ID.LocalAddress.BitLen() { 308 case header.IPv4AddressSizeBits: 309 if addr.Addr.BitLen() == header.IPv6AddressSizeBits { 310 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 311 } 312 case header.IPv6AddressSizeBits: 313 if addr.Addr.BitLen() == header.IPv4AddressSizeBits { 314 return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{} 315 } 316 } 317 318 switch { 319 case netProto == t.NetProto: 320 case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber: 321 if v6only { 322 return tcpip.FullAddress{}, 0, &tcpip.ErrHostUnreachable{} 323 } 324 default: 325 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{} 326 } 327 328 return addr, netProto, nil 329 } 330 331 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo 332 // marker interface. 333 func (*TransportEndpointInfo) IsEndpointInfo() {} 334 335 // New allocates a new networking stack with only the requested networking and 336 // transport protocols configured with default options. 337 // 338 // Note, NDPConfigurations will be fixed before being used by the Stack. That 339 // is, if an invalid value was provided, it will be reset to the default value. 340 // 341 // Protocol options can be changed by calling the 342 // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the 343 // stack. Please refer to individual protocol implementations as to what options 344 // are supported. 345 func New(opts Options) *Stack { 346 clock := opts.Clock 347 if clock == nil { 348 clock = tcpip.NewStdClock() 349 } 350 351 if opts.UniqueID == nil { 352 opts.UniqueID = new(uniqueIDGenerator) 353 } 354 355 if opts.SecureRNG == nil { 356 opts.SecureRNG = cryptorand.Reader 357 } 358 secureRNG := cryptorand.RNGFrom(opts.SecureRNG) 359 360 randSrc := opts.RandSource 361 if randSrc == nil { 362 var v int64 363 if err := binary.Read(opts.SecureRNG, binary.LittleEndian, &v); err != nil { 364 panic(err) 365 } 366 // Source provided by rand.NewSource is not thread-safe so 367 // we wrap it in a simple thread-safe version. 368 randSrc = &lockedRandomSource{src: rand.NewSource(v)} 369 } 370 insecureRNG := rand.New(randSrc) 371 372 if opts.IPTables == nil { 373 if opts.DefaultIPTables == nil { 374 opts.DefaultIPTables = DefaultTables 375 } 376 opts.IPTables = opts.DefaultIPTables(clock, insecureRNG) 377 } 378 379 opts.NUDConfigs.resetInvalidFields() 380 381 s := &Stack{ 382 transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState), 383 networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol), 384 nics: make(map[tcpip.NICID]*nic), 385 packetEndpointWriteSupported: opts.AllowPacketEndpointWrite, 386 defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}), 387 cleanupEndpoints: make(map[TransportEndpoint]struct{}), 388 PortManager: ports.NewPortManager(), 389 clock: clock, 390 stats: opts.Stats.FillIn(), 391 handleLocal: opts.HandleLocal, 392 tables: opts.IPTables, 393 icmpRateLimiter: NewICMPRateLimiter(clock), 394 seed: secureRNG.Uint32(), 395 nudConfigs: opts.NUDConfigs, 396 uniqueIDGenerator: opts.UniqueID, 397 nudDisp: opts.NUDDisp, 398 insecureRNG: insecureRNG, 399 secureRNG: secureRNG, 400 sendBufferSize: tcpip.SendBufferSizeOption{ 401 Min: MinBufferSize, 402 Default: DefaultBufferSize, 403 Max: DefaultMaxBufferSize, 404 }, 405 receiveBufferSize: tcpip.ReceiveBufferSizeOption{ 406 Min: MinBufferSize, 407 Default: DefaultBufferSize, 408 Max: DefaultMaxBufferSize, 409 }, 410 tcpInvalidRateLimit: defaultTCPInvalidRateLimit, 411 tsOffsetSecret: secureRNG.Uint32(), 412 } 413 414 // Add specified network protocols. 415 for _, netProtoFactory := range opts.NetworkProtocols { 416 netProto := netProtoFactory(s) 417 s.networkProtocols[netProto.Number()] = netProto 418 } 419 420 // Add specified transport protocols. 421 for _, transProtoFactory := range opts.TransportProtocols { 422 transProto := transProtoFactory(s) 423 s.transportProtocols[transProto.Number()] = &transportProtocolState{ 424 proto: transProto, 425 } 426 } 427 428 // Add the factory for raw endpoints, if present. 429 s.rawFactory = opts.RawFactory 430 431 // Create the global transport demuxer. 432 s.demux = newTransportDemuxer(s) 433 434 return s 435 } 436 437 // UniqueID returns a unique identifier. 438 func (s *Stack) UniqueID() uint64 { 439 return s.uniqueIDGenerator.UniqueID() 440 } 441 442 // SetNetworkProtocolOption allows configuring individual protocol level 443 // options. This method returns an error if the protocol is not supported or 444 // option is not supported by the protocol implementation or the provided value 445 // is incorrect. 446 func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error { 447 netProto, ok := s.networkProtocols[network] 448 if !ok { 449 return &tcpip.ErrUnknownProtocol{} 450 } 451 return netProto.SetOption(option) 452 } 453 454 // NetworkProtocolOption allows retrieving individual protocol level option 455 // values. This method returns an error if the protocol is not supported or 456 // option is not supported by the protocol implementation. E.g.: 457 // 458 // var v ipv4.MyOption 459 // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v) 460 // if err != nil { 461 // ... 462 // } 463 func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error { 464 netProto, ok := s.networkProtocols[network] 465 if !ok { 466 return &tcpip.ErrUnknownProtocol{} 467 } 468 return netProto.Option(option) 469 } 470 471 // SetTransportProtocolOption allows configuring individual protocol level 472 // options. This method returns an error if the protocol is not supported or 473 // option is not supported by the protocol implementation or the provided value 474 // is incorrect. 475 func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error { 476 transProtoState, ok := s.transportProtocols[transport] 477 if !ok { 478 return &tcpip.ErrUnknownProtocol{} 479 } 480 return transProtoState.proto.SetOption(option) 481 } 482 483 // TransportProtocolOption allows retrieving individual protocol level option 484 // values. This method returns an error if the protocol is not supported or 485 // option is not supported by the protocol implementation. 486 // 487 // var v tcp.SACKEnabled 488 // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil { 489 // ... 490 // } 491 func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error { 492 transProtoState, ok := s.transportProtocols[transport] 493 if !ok { 494 return &tcpip.ErrUnknownProtocol{} 495 } 496 return transProtoState.proto.Option(option) 497 } 498 499 // SendBufSizeProto is a protocol that can return its send buffer size. 500 type SendBufSizeProto interface { 501 SendBufferSize() tcpip.TCPSendBufferSizeRangeOption 502 } 503 504 // TCPSendBufferLimits returns the TCP send buffer size limit. 505 func (s *Stack) TCPSendBufferLimits() tcpip.TCPSendBufferSizeRangeOption { 506 return s.transportProtocols[header.TCPProtocolNumber].proto.(SendBufSizeProto).SendBufferSize() 507 } 508 509 // SetTransportProtocolHandler sets the per-stack default handler for the given 510 // protocol. 511 // 512 // It must be called only during initialization of the stack. Changing it as the 513 // stack is operating is not supported. 514 func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, *PacketBuffer) bool) { 515 state := s.transportProtocols[p] 516 if state != nil { 517 state.defaultHandler = h 518 } 519 } 520 521 // Clock returns the Stack's clock for retrieving the current time and 522 // scheduling work. 523 func (s *Stack) Clock() tcpip.Clock { 524 return s.clock 525 } 526 527 // Stats returns a mutable copy of the current stats. 528 // 529 // This is not generally exported via the public interface, but is available 530 // internally. 531 func (s *Stack) Stats() tcpip.Stats { 532 return s.stats 533 } 534 535 // SetNICForwarding enables or disables packet forwarding on the specified NIC 536 // for the passed protocol. 537 // 538 // Returns the previous configuration on the NIC. 539 func (s *Stack) SetNICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 540 s.mu.RLock() 541 defer s.mu.RUnlock() 542 543 nic, ok := s.nics[id] 544 if !ok { 545 return false, &tcpip.ErrUnknownNICID{} 546 } 547 548 return nic.setForwarding(protocol, enable) 549 } 550 551 // NICForwarding returns the forwarding configuration for the specified NIC. 552 func (s *Stack) NICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 553 s.mu.RLock() 554 defer s.mu.RUnlock() 555 556 nic, ok := s.nics[id] 557 if !ok { 558 return false, &tcpip.ErrUnknownNICID{} 559 } 560 561 return nic.forwarding(protocol) 562 } 563 564 // SetForwardingDefaultAndAllNICs sets packet forwarding for all NICs for the 565 // passed protocol and sets the default setting for newly created NICs. 566 func (s *Stack) SetForwardingDefaultAndAllNICs(protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error { 567 s.mu.Lock() 568 defer s.mu.Unlock() 569 570 doneOnce := false 571 for id, nic := range s.nics { 572 if _, err := nic.setForwarding(protocol, enable); err != nil { 573 // Expect forwarding to be settable on all interfaces if it was set on 574 // one. 575 if doneOnce { 576 panic(fmt.Sprintf("nic(id=%d).setForwarding(%d, %t): %s", id, protocol, enable, err)) 577 } 578 579 return err 580 } 581 582 doneOnce = true 583 } 584 585 if enable { 586 s.defaultForwardingEnabled[protocol] = struct{}{} 587 } else { 588 delete(s.defaultForwardingEnabled, protocol) 589 } 590 591 return nil 592 } 593 594 // AddMulticastRoute adds a multicast route to be used for the specified 595 // addresses and protocol. 596 func (s *Stack) AddMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination, route MulticastRoute) tcpip.Error { 597 netProto, ok := s.networkProtocols[protocol] 598 if !ok { 599 return &tcpip.ErrUnknownProtocol{} 600 } 601 602 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 603 if !ok { 604 return &tcpip.ErrNotSupported{} 605 } 606 607 return forwardingNetProto.AddMulticastRoute(addresses, route) 608 } 609 610 // RemoveMulticastRoute removes a multicast route that matches the specified 611 // addresses and protocol. 612 func (s *Stack) RemoveMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) tcpip.Error { 613 netProto, ok := s.networkProtocols[protocol] 614 if !ok { 615 return &tcpip.ErrUnknownProtocol{} 616 } 617 618 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 619 if !ok { 620 return &tcpip.ErrNotSupported{} 621 } 622 623 return forwardingNetProto.RemoveMulticastRoute(addresses) 624 } 625 626 // MulticastRouteLastUsedTime returns a monotonic timestamp that represents the 627 // last time that the route that matches the provided addresses and protocol 628 // was used or updated. 629 func (s *Stack) MulticastRouteLastUsedTime(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 630 netProto, ok := s.networkProtocols[protocol] 631 if !ok { 632 return tcpip.MonotonicTime{}, &tcpip.ErrUnknownProtocol{} 633 } 634 635 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 636 if !ok { 637 return tcpip.MonotonicTime{}, &tcpip.ErrNotSupported{} 638 } 639 640 return forwardingNetProto.MulticastRouteLastUsedTime(addresses) 641 } 642 643 // EnableMulticastForwardingForProtocol enables multicast forwarding for the 644 // provided protocol. 645 // 646 // Returns true if forwarding was already enabled on the protocol. 647 // Additionally, returns an error if: 648 // 649 // - The protocol is not found. 650 // - The protocol doesn't support multicast forwarding. 651 // - The multicast forwarding event dispatcher is nil. 652 // 653 // If successful, future multicast forwarding events will be sent to the 654 // provided event dispatcher. 655 func (s *Stack) EnableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber, disp MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 656 netProto, ok := s.networkProtocols[protocol] 657 if !ok { 658 return false, &tcpip.ErrUnknownProtocol{} 659 } 660 661 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 662 if !ok { 663 return false, &tcpip.ErrNotSupported{} 664 } 665 666 return forwardingNetProto.EnableMulticastForwarding(disp) 667 } 668 669 // DisableMulticastForwardingForProtocol disables multicast forwarding for the 670 // provided protocol. 671 // 672 // Returns an error if the provided protocol is not found or if it does not 673 // support multicast forwarding. 674 func (s *Stack) DisableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber) tcpip.Error { 675 netProto, ok := s.networkProtocols[protocol] 676 if !ok { 677 return &tcpip.ErrUnknownProtocol{} 678 } 679 680 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol) 681 if !ok { 682 return &tcpip.ErrNotSupported{} 683 } 684 685 forwardingNetProto.DisableMulticastForwarding() 686 return nil 687 } 688 689 // SetNICMulticastForwarding enables or disables multicast packet forwarding on 690 // the specified NIC for the passed protocol. 691 // 692 // Returns the previous configuration on the NIC. 693 // 694 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 695 // forwarding. Currently, setting this value is a no-op and is not ready for 696 // use. 697 func (s *Stack) SetNICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) { 698 s.mu.RLock() 699 defer s.mu.RUnlock() 700 701 nic, ok := s.nics[id] 702 if !ok { 703 return false, &tcpip.ErrUnknownNICID{} 704 } 705 706 return nic.setMulticastForwarding(protocol, enable) 707 } 708 709 // NICMulticastForwarding returns the multicast forwarding configuration for 710 // the specified NIC. 711 func (s *Stack) NICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) { 712 s.mu.RLock() 713 defer s.mu.RUnlock() 714 715 nic, ok := s.nics[id] 716 if !ok { 717 return false, &tcpip.ErrUnknownNICID{} 718 } 719 720 return nic.multicastForwarding(protocol) 721 } 722 723 // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in 724 // both IPv4 and IPv6. 725 func (s *Stack) PortRange() (uint16, uint16) { 726 return s.PortManager.PortRange() 727 } 728 729 // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range 730 // (inclusive). 731 func (s *Stack) SetPortRange(start uint16, end uint16) tcpip.Error { 732 return s.PortManager.SetPortRange(start, end) 733 } 734 735 // SetRouteTable assigns the route table to be used by this stack. It 736 // specifies which NIC to use for given destination address ranges. 737 // 738 // This method takes ownership of the table. 739 func (s *Stack) SetRouteTable(table []tcpip.Route) { 740 s.routeMu.Lock() 741 defer s.routeMu.Unlock() 742 s.routeTable = table 743 } 744 745 // GetRouteTable returns the route table which is currently in use. 746 func (s *Stack) GetRouteTable() []tcpip.Route { 747 s.routeMu.RLock() 748 defer s.routeMu.RUnlock() 749 return append([]tcpip.Route(nil), s.routeTable...) 750 } 751 752 // AddRoute appends a route to the route table. 753 func (s *Stack) AddRoute(route tcpip.Route) { 754 s.routeMu.Lock() 755 defer s.routeMu.Unlock() 756 s.routeTable = append(s.routeTable, route) 757 } 758 759 // RemoveRoutes removes matching routes from the route table. 760 func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) { 761 s.routeMu.Lock() 762 defer s.routeMu.Unlock() 763 764 var filteredRoutes []tcpip.Route 765 for _, route := range s.routeTable { 766 if !match(route) { 767 filteredRoutes = append(filteredRoutes, route) 768 } 769 } 770 s.routeTable = filteredRoutes 771 } 772 773 // NewEndpoint creates a new transport layer endpoint of the given protocol. 774 func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 775 t, ok := s.transportProtocols[transport] 776 if !ok { 777 return nil, &tcpip.ErrUnknownProtocol{} 778 } 779 780 return t.proto.NewEndpoint(network, waiterQueue) 781 } 782 783 // NewRawEndpoint creates a new raw transport layer endpoint of the given 784 // protocol. Raw endpoints receive all traffic for a given protocol regardless 785 // of address. 786 func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) { 787 if s.rawFactory == nil { 788 netRawMissingLogger.Infof("A process tried to create a raw socket, but --net-raw was not specified. Should runsc be run with --net-raw?") 789 return nil, &tcpip.ErrNotPermitted{} 790 } 791 792 if !associated { 793 return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue) 794 } 795 796 t, ok := s.transportProtocols[transport] 797 if !ok { 798 return nil, &tcpip.ErrUnknownProtocol{} 799 } 800 801 return t.proto.NewRawEndpoint(network, waiterQueue) 802 } 803 804 // NewPacketEndpoint creates a new packet endpoint listening for the given 805 // netProto. 806 func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 807 if s.rawFactory == nil { 808 return nil, &tcpip.ErrNotPermitted{} 809 } 810 811 return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue) 812 } 813 814 // NICContext is an opaque pointer used to store client-supplied NIC metadata. 815 type NICContext any 816 817 // NICOptions specifies the configuration of a NIC as it is being created. 818 // The zero value creates an enabled, unnamed NIC. 819 type NICOptions struct { 820 // Name specifies the name of the NIC. 821 Name string 822 823 // Disabled specifies whether to avoid calling Attach on the passed 824 // LinkEndpoint. 825 Disabled bool 826 827 // Context specifies user-defined data that will be returned in stack.NICInfo 828 // for the NIC. Clients of this library can use it to add metadata that 829 // should be tracked alongside a NIC, to avoid having to keep a 830 // map[tcpip.NICID]metadata mirroring stack.Stack's nic map. 831 Context NICContext 832 833 // QDisc is the queue discipline to use for this NIC. 834 QDisc QueueingDiscipline 835 836 // DeliverLinkPackets specifies whether the NIC is responsible for 837 // delivering raw packets to packet sockets. 838 DeliverLinkPackets bool 839 } 840 841 // CreateNICWithOptions creates a NIC with the provided id, LinkEndpoint, and 842 // NICOptions. See the documentation on type NICOptions for details on how 843 // NICs can be configured. 844 // 845 // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher. 846 func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error { 847 s.mu.Lock() 848 defer s.mu.Unlock() 849 850 // Make sure id is unique. 851 if _, ok := s.nics[id]; ok { 852 return &tcpip.ErrDuplicateNICID{} 853 } 854 855 // Make sure name is unique, unless unnamed. 856 if opts.Name != "" { 857 for _, n := range s.nics { 858 if n.Name() == opts.Name { 859 return &tcpip.ErrDuplicateNICID{} 860 } 861 } 862 } 863 864 n := newNIC(s, id, ep, opts) 865 for proto := range s.defaultForwardingEnabled { 866 if _, err := n.setForwarding(proto, true); err != nil { 867 panic(fmt.Sprintf("newNIC(%d, ...).setForwarding(%d, true): %s", id, proto, err)) 868 } 869 } 870 s.nics[id] = n 871 if !opts.Disabled { 872 return n.enable() 873 } 874 875 return nil 876 } 877 878 // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls 879 // LinkEndpoint.Attach to bind ep with a NetworkDispatcher. 880 func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error { 881 return s.CreateNICWithOptions(id, ep, NICOptions{}) 882 } 883 884 // GetLinkEndpointByName gets the link endpoint specified by name. 885 func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint { 886 s.mu.RLock() 887 defer s.mu.RUnlock() 888 for _, nic := range s.nics { 889 if nic.Name() == name { 890 linkEP, ok := nic.NetworkLinkEndpoint.(LinkEndpoint) 891 if !ok { 892 panic(fmt.Sprintf("unexpected NetworkLinkEndpoint(%#v) is not a LinkEndpoint", nic.NetworkLinkEndpoint)) 893 } 894 return linkEP 895 } 896 } 897 return nil 898 } 899 900 // EnableNIC enables the given NIC so that the link-layer endpoint can start 901 // delivering packets to it. 902 func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error { 903 s.mu.RLock() 904 defer s.mu.RUnlock() 905 906 nic, ok := s.nics[id] 907 if !ok { 908 return &tcpip.ErrUnknownNICID{} 909 } 910 911 return nic.enable() 912 } 913 914 // DisableNIC disables the given NIC. 915 func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error { 916 s.mu.RLock() 917 defer s.mu.RUnlock() 918 919 nic, ok := s.nics[id] 920 if !ok { 921 return &tcpip.ErrUnknownNICID{} 922 } 923 924 nic.disable() 925 return nil 926 } 927 928 // CheckNIC checks if a NIC is usable. 929 func (s *Stack) CheckNIC(id tcpip.NICID) bool { 930 s.mu.RLock() 931 defer s.mu.RUnlock() 932 933 nic, ok := s.nics[id] 934 if !ok { 935 return false 936 } 937 938 return nic.Enabled() 939 } 940 941 // RemoveNIC removes NIC and all related routes from the network stack. 942 func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error { 943 s.mu.Lock() 944 defer s.mu.Unlock() 945 946 return s.removeNICLocked(id) 947 } 948 949 // removeNICLocked removes NIC and all related routes from the network stack. 950 // 951 // +checklocks:s.mu 952 func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error { 953 nic, ok := s.nics[id] 954 if !ok { 955 return &tcpip.ErrUnknownNICID{} 956 } 957 delete(s.nics, id) 958 959 // Remove routes in-place. n tracks the number of routes written. 960 s.routeMu.Lock() 961 n := 0 962 for _, r := range s.routeTable { 963 if r.NIC != id { 964 // Keep this route. 965 s.routeTable[n] = r 966 n++ 967 } 968 } 969 common.ClearArray(s.routeTable[n:]) 970 s.routeTable = s.routeTable[:n] 971 s.routeMu.Unlock() 972 973 return nic.remove() 974 } 975 976 // NICInfo captures the name and addresses assigned to a NIC. 977 type NICInfo struct { 978 Name string 979 LinkAddress tcpip.LinkAddress 980 ProtocolAddresses []tcpip.ProtocolAddress 981 982 // Flags indicate the state of the NIC. 983 Flags NICStateFlags 984 985 // MTU is the maximum transmission unit. 986 MTU uint32 987 988 Stats tcpip.NICStats 989 990 // NetworkStats holds the stats of each NetworkEndpoint bound to the NIC. 991 NetworkStats map[tcpip.NetworkProtocolNumber]NetworkEndpointStats 992 993 // Context is user-supplied data optionally supplied in CreateNICWithOptions. 994 // See type NICOptions for more details. 995 Context NICContext 996 997 // ARPHardwareType holds the ARP Hardware type of the NIC. This is the 998 // value sent in haType field of an ARP Request sent by this NIC and the 999 // value expected in the haType field of an ARP response. 1000 ARPHardwareType header.ARPHardwareType 1001 1002 // Forwarding holds the forwarding status for each network endpoint that 1003 // supports forwarding. 1004 Forwarding map[tcpip.NetworkProtocolNumber]bool 1005 1006 // MulticastForwarding holds the forwarding status for each network endpoint 1007 // that supports multicast forwarding. 1008 MulticastForwarding map[tcpip.NetworkProtocolNumber]bool 1009 } 1010 1011 // HasNIC returns true if the NICID is defined in the stack. 1012 func (s *Stack) HasNIC(id tcpip.NICID) bool { 1013 s.mu.RLock() 1014 _, ok := s.nics[id] 1015 s.mu.RUnlock() 1016 return ok 1017 } 1018 1019 // NICInfo returns a map of NICIDs to their associated information. 1020 func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo { 1021 s.mu.RLock() 1022 defer s.mu.RUnlock() 1023 1024 type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error) 1025 forwardingValue := func(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) { 1026 switch forwarding, err := forwardingFn(proto); err.(type) { 1027 case nil: 1028 return forwarding, true 1029 case *tcpip.ErrUnknownProtocol: 1030 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID)) 1031 case *tcpip.ErrNotSupported: 1032 // Not all network protocols support forwarding. 1033 default: 1034 panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err)) 1035 } 1036 return false, false 1037 } 1038 1039 nics := make(map[tcpip.NICID]NICInfo) 1040 for id, nic := range s.nics { 1041 flags := NICStateFlags{ 1042 Up: true, // Netstack interfaces are always up. 1043 Running: nic.Enabled(), 1044 Promiscuous: nic.Promiscuous(), 1045 Loopback: nic.IsLoopback(), 1046 } 1047 1048 netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats) 1049 for proto, netEP := range nic.networkEndpoints { 1050 netStats[proto] = netEP.Stats() 1051 } 1052 1053 info := NICInfo{ 1054 Name: nic.name, 1055 LinkAddress: nic.NetworkLinkEndpoint.LinkAddress(), 1056 ProtocolAddresses: nic.primaryAddresses(), 1057 Flags: flags, 1058 MTU: nic.NetworkLinkEndpoint.MTU(), 1059 Stats: nic.stats.local, 1060 NetworkStats: netStats, 1061 Context: nic.context, 1062 ARPHardwareType: nic.NetworkLinkEndpoint.ARPHardwareType(), 1063 Forwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1064 MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool), 1065 } 1066 1067 for proto := range s.networkProtocols { 1068 if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok { 1069 info.Forwarding[proto] = forwarding 1070 } 1071 1072 if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok { 1073 info.MulticastForwarding[proto] = multicastForwarding 1074 } 1075 } 1076 1077 nics[id] = info 1078 } 1079 return nics 1080 } 1081 1082 // NICStateFlags holds information about the state of an NIC. 1083 type NICStateFlags struct { 1084 // Up indicates whether the interface is running. 1085 Up bool 1086 1087 // Running indicates whether resources are allocated. 1088 Running bool 1089 1090 // Promiscuous indicates whether the interface is in promiscuous mode. 1091 Promiscuous bool 1092 1093 // Loopback indicates whether the interface is a loopback. 1094 Loopback bool 1095 } 1096 1097 // AddProtocolAddress adds an address to the specified NIC, possibly with extra 1098 // properties. 1099 func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error { 1100 s.mu.RLock() 1101 defer s.mu.RUnlock() 1102 1103 nic, ok := s.nics[id] 1104 if !ok { 1105 return &tcpip.ErrUnknownNICID{} 1106 } 1107 1108 return nic.addAddress(protocolAddress, properties) 1109 } 1110 1111 // RemoveAddress removes an existing network-layer address from the specified 1112 // NIC. 1113 func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error { 1114 s.mu.RLock() 1115 defer s.mu.RUnlock() 1116 1117 if nic, ok := s.nics[id]; ok { 1118 return nic.removeAddress(addr) 1119 } 1120 1121 return &tcpip.ErrUnknownNICID{} 1122 } 1123 1124 // SetAddressLifetimes sets informational preferred and valid lifetimes, and 1125 // whether the address should be preferred or deprecated. 1126 func (s *Stack) SetAddressLifetimes(id tcpip.NICID, addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error { 1127 s.mu.RLock() 1128 defer s.mu.RUnlock() 1129 1130 if nic, ok := s.nics[id]; ok { 1131 return nic.setAddressLifetimes(addr, lifetimes) 1132 } 1133 1134 return &tcpip.ErrUnknownNICID{} 1135 } 1136 1137 // AllAddresses returns a map of NICIDs to their protocol addresses (primary 1138 // and non-primary). 1139 func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress { 1140 s.mu.RLock() 1141 defer s.mu.RUnlock() 1142 1143 nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress) 1144 for id, nic := range s.nics { 1145 nics[id] = nic.allPermanentAddresses() 1146 } 1147 return nics 1148 } 1149 1150 // GetMainNICAddress returns the first non-deprecated primary address and prefix 1151 // for the given NIC and protocol. If no non-deprecated primary addresses exist, 1152 // a deprecated address will be returned. If no deprecated addresses exist, the 1153 // zero value will be returned. 1154 func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) { 1155 s.mu.RLock() 1156 defer s.mu.RUnlock() 1157 1158 nic, ok := s.nics[id] 1159 if !ok { 1160 return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownNICID{} 1161 } 1162 1163 return nic.PrimaryAddress(protocol) 1164 } 1165 1166 func (s *Stack) getAddressEP(nic *nic, localAddr, remoteAddr, srcHint tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint { 1167 if localAddr.BitLen() == 0 { 1168 return nic.primaryEndpoint(netProto, remoteAddr, srcHint) 1169 } 1170 return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint) 1171 } 1172 1173 // NewRouteForMulticast returns a Route that may be used to forward multicast 1174 // packets. 1175 // 1176 // Returns nil if validation fails. 1177 func (s *Stack) NewRouteForMulticast(nicID tcpip.NICID, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1178 s.mu.RLock() 1179 defer s.mu.RUnlock() 1180 1181 nic, ok := s.nics[nicID] 1182 if !ok || !nic.Enabled() { 1183 return nil 1184 } 1185 1186 if addressEndpoint := s.getAddressEP(nic, tcpip.Address{} /* localAddr */, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil { 1187 return constructAndValidateRoute(netProto, addressEndpoint, nic, nic, tcpip.Address{} /* gateway */, tcpip.Address{} /* localAddr */, remoteAddr, s.handleLocal, false /* multicastLoop */) 1188 } 1189 return nil 1190 } 1191 1192 // findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route 1193 // from the specified NIC. 1194 // 1195 // +checklocksread:s.mu 1196 func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1197 localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint) 1198 if localAddressEndpoint == nil { 1199 return nil 1200 } 1201 1202 var outgoingNIC *nic 1203 // Prefer a local route to the same interface as the local address. 1204 if localAddressNIC.hasAddress(netProto, remoteAddr) { 1205 outgoingNIC = localAddressNIC 1206 } 1207 1208 // If the remote address isn't owned by the local address's NIC, check all 1209 // NICs. 1210 if outgoingNIC == nil { 1211 for _, nic := range s.nics { 1212 if nic.hasAddress(netProto, remoteAddr) { 1213 outgoingNIC = nic 1214 break 1215 } 1216 } 1217 } 1218 1219 // If the remote address is not owned by the stack, we can't return a local 1220 // route. 1221 if outgoingNIC == nil { 1222 localAddressEndpoint.DecRef() 1223 return nil 1224 } 1225 1226 r := makeLocalRoute( 1227 netProto, 1228 localAddr, 1229 remoteAddr, 1230 outgoingNIC, 1231 localAddressNIC, 1232 localAddressEndpoint, 1233 ) 1234 1235 if r.IsOutboundBroadcast() { 1236 r.Release() 1237 return nil 1238 } 1239 1240 return r 1241 } 1242 1243 // findLocalRouteRLocked returns a local route. 1244 // 1245 // A local route is a route to some remote address which the stack owns. That 1246 // is, a local route is a route where packets never have to leave the stack. 1247 // 1248 // +checklocksread:s.mu 1249 func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route { 1250 if localAddr.BitLen() == 0 { 1251 localAddr = remoteAddr 1252 } 1253 1254 if localAddressNICID == 0 { 1255 for _, localAddressNIC := range s.nics { 1256 if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil { 1257 return r 1258 } 1259 } 1260 1261 return nil 1262 } 1263 1264 if localAddressNIC, ok := s.nics[localAddressNICID]; ok { 1265 return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto) 1266 } 1267 1268 return nil 1269 } 1270 1271 // HandleLocal returns true if non-loopback interfaces are allowed to loop packets. 1272 func (s *Stack) HandleLocal() bool { 1273 return s.handleLocal 1274 } 1275 1276 func isNICForwarding(nic *nic, proto tcpip.NetworkProtocolNumber) bool { 1277 switch forwarding, err := nic.forwarding(proto); err.(type) { 1278 case nil: 1279 return forwarding 1280 case *tcpip.ErrUnknownProtocol: 1281 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID())) 1282 case *tcpip.ErrNotSupported: 1283 // Not all network protocols support forwarding. 1284 return false 1285 default: 1286 panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err)) 1287 } 1288 } 1289 1290 // findRouteWithLocalAddrFromAnyInterfaceRLocked returns a route to the given 1291 // destination address, leaving through the given NIC. 1292 // 1293 // Rather than preferring to find a route that uses a local address assigned to 1294 // the outgoing interface, it finds any NIC that holds a matching local address 1295 // endpoint. 1296 // 1297 // +checklocksread:s.mu 1298 func (s *Stack) findRouteWithLocalAddrFromAnyInterfaceRLocked(outgoingNIC *nic, localAddr, remoteAddr, srcHint, gateway tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) *Route { 1299 for _, aNIC := range s.nics { 1300 addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, srcHint, netProto) 1301 if addressEndpoint == nil { 1302 continue 1303 } 1304 1305 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, outgoingNIC, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1306 return r 1307 } 1308 } 1309 return nil 1310 } 1311 1312 // FindRoute creates a route to the given destination address, leaving through 1313 // the given NIC and local address (if provided). 1314 // 1315 // If a NIC is not specified, the returned route will leave through the same 1316 // NIC as the NIC that has the local address assigned when forwarding is 1317 // disabled. If forwarding is enabled and the NIC is unspecified, the route may 1318 // leave through any interface unless the route is link-local. 1319 // 1320 // If no local address is provided, the stack will select a local address. If no 1321 // remote address is provided, the stack will use a remote address equal to the 1322 // local address. 1323 func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) { 1324 s.mu.RLock() 1325 defer s.mu.RUnlock() 1326 1327 // Reject attempts to use unsupported protocols. 1328 if !s.CheckNetworkProtocol(netProto) { 1329 return nil, &tcpip.ErrUnknownProtocol{} 1330 } 1331 1332 isLinkLocal := header.IsV6LinkLocalUnicastAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr) 1333 isLocalBroadcast := remoteAddr == header.IPv4Broadcast 1334 isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr) 1335 isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr) 1336 needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback) 1337 1338 if s.handleLocal && !isMulticast && !isLocalBroadcast { 1339 if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil { 1340 return r, nil 1341 } 1342 } 1343 1344 // If the interface is specified and we do not need a route, return a route 1345 // through the interface if the interface is valid and enabled. 1346 if id != 0 && !needRoute { 1347 if nic, ok := s.nics[id]; ok && nic.Enabled() { 1348 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil { 1349 return makeRoute( 1350 netProto, 1351 tcpip.Address{}, /* gateway */ 1352 localAddr, 1353 remoteAddr, 1354 nic, /* outboundNIC */ 1355 nic, /* localAddressNIC*/ 1356 addressEndpoint, 1357 s.handleLocal, 1358 multicastLoop, 1359 ), nil 1360 } 1361 } 1362 1363 if isLoopback { 1364 return nil, &tcpip.ErrBadLocalAddress{} 1365 } 1366 return nil, &tcpip.ErrNetworkUnreachable{} 1367 } 1368 1369 onlyGlobalAddresses := !header.IsV6LinkLocalUnicastAddress(localAddr) && !isLinkLocal 1370 1371 // Find a route to the remote with the route table. 1372 var chosenRoute tcpip.Route 1373 if r := func() *Route { 1374 s.routeMu.RLock() 1375 defer s.routeMu.RUnlock() 1376 1377 for _, route := range s.routeTable { 1378 if remoteAddr.BitLen() != 0 && !route.Destination.Contains(remoteAddr) { 1379 continue 1380 } 1381 1382 nic, ok := s.nics[route.NIC] 1383 if !ok || !nic.Enabled() { 1384 continue 1385 } 1386 1387 if id == 0 || id == route.NIC { 1388 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, route.SourceHint, netProto); addressEndpoint != nil { 1389 var gateway tcpip.Address 1390 if needRoute { 1391 gateway = route.Gateway 1392 } 1393 r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop) 1394 if r == nil { 1395 panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr)) 1396 } 1397 return r 1398 } 1399 } 1400 1401 // If the stack has forwarding enabled, we haven't found a valid route to 1402 // the remote address yet, and we are routing locally generated traffic, 1403 // keep track of the first valid route. We keep iterating because we 1404 // prefer routes that let us use a local address that is assigned to the 1405 // outgoing interface. There is no requirement to do this from any RFC 1406 // but simply a choice made to better follow a strong host model which 1407 // the netstack follows at the time of writing. 1408 // 1409 // Note that for incoming traffic that we are forwarding (for which the 1410 // NIC and local address are unspecified), we do not keep iterating, as 1411 // there is no reason to prefer routes that let us use a local address 1412 // when routing forwarded (as opposed to locally-generated) traffic. 1413 locallyGenerated := (id != 0 || localAddr != tcpip.Address{}) 1414 if onlyGlobalAddresses && chosenRoute.Equal(tcpip.Route{}) && isNICForwarding(nic, netProto) { 1415 if locallyGenerated { 1416 chosenRoute = route 1417 continue 1418 } 1419 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, route.SourceHint, route.Gateway, netProto, multicastLoop); r != nil { 1420 return r 1421 } 1422 } 1423 } 1424 1425 return nil 1426 }(); r != nil { 1427 return r, nil 1428 } 1429 1430 if !chosenRoute.Equal(tcpip.Route{}) { 1431 // At this point we know the stack has forwarding enabled since chosenRoute is 1432 // only set when forwarding is enabled. 1433 nic, ok := s.nics[chosenRoute.NIC] 1434 if !ok { 1435 // If the route's NIC was invalid, we should not have chosen the route. 1436 panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC)) 1437 } 1438 1439 var gateway tcpip.Address 1440 if needRoute { 1441 gateway = chosenRoute.Gateway 1442 } 1443 1444 // Use the specified NIC to get the local address endpoint. 1445 if id != 0 { 1446 if aNIC, ok := s.nics[id]; ok { 1447 if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, chosenRoute.SourceHint, netProto); addressEndpoint != nil { 1448 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil { 1449 return r, nil 1450 } 1451 } 1452 } 1453 1454 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1455 return nil, &tcpip.ErrHostUnreachable{} 1456 } 1457 1458 if id == 0 { 1459 // If an interface is not specified, try to find a NIC that holds the local 1460 // address endpoint to construct a route. 1461 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, chosenRoute.SourceHint, gateway, netProto, multicastLoop); r != nil { 1462 return r, nil 1463 } 1464 } 1465 } 1466 1467 if needRoute { 1468 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1469 return nil, &tcpip.ErrHostUnreachable{} 1470 } 1471 if header.IsV6LoopbackAddress(remoteAddr) { 1472 return nil, &tcpip.ErrBadLocalAddress{} 1473 } 1474 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable. 1475 return nil, &tcpip.ErrNetworkUnreachable{} 1476 } 1477 1478 // CheckNetworkProtocol checks if a given network protocol is enabled in the 1479 // stack. 1480 func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool { 1481 _, ok := s.networkProtocols[protocol] 1482 return ok 1483 } 1484 1485 // CheckDuplicateAddress performs duplicate address detection for the address on 1486 // the specified interface. 1487 func (s *Stack) CheckDuplicateAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) { 1488 s.mu.RLock() 1489 nic, ok := s.nics[nicID] 1490 s.mu.RUnlock() 1491 1492 if !ok { 1493 return 0, &tcpip.ErrUnknownNICID{} 1494 } 1495 1496 return nic.checkDuplicateAddress(protocol, addr, h) 1497 } 1498 1499 // CheckLocalAddress determines if the given local address exists, and if it 1500 // does, returns the id of the NIC it's bound to. Returns 0 if the address 1501 // does not exist. 1502 func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID { 1503 s.mu.RLock() 1504 defer s.mu.RUnlock() 1505 1506 // If a NIC is specified, use its NIC id. 1507 if nicID != 0 { 1508 nic, ok := s.nics[nicID] 1509 if !ok { 1510 return 0 1511 } 1512 // In IPv4, linux only checks the interface. If it matches, then it does 1513 // not bother with the address. 1514 // https://github.com/torvalds/linux/blob/15205c2829ca2cbb5ece5ceaafe1171a8470e62b/net/ipv4/igmp.c#L1829-L1837 1515 if protocol == header.IPv4ProtocolNumber { 1516 return nic.id 1517 } 1518 if nic.CheckLocalAddress(protocol, addr) { 1519 return nic.id 1520 } 1521 return 0 1522 } 1523 1524 // Go through all the NICs. 1525 for _, nic := range s.nics { 1526 if nic.CheckLocalAddress(protocol, addr) { 1527 return nic.id 1528 } 1529 } 1530 1531 return 0 1532 } 1533 1534 // SetPromiscuousMode enables or disables promiscuous mode in the given NIC. 1535 func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error { 1536 s.mu.RLock() 1537 defer s.mu.RUnlock() 1538 1539 nic, ok := s.nics[nicID] 1540 if !ok { 1541 return &tcpip.ErrUnknownNICID{} 1542 } 1543 1544 nic.setPromiscuousMode(enable) 1545 1546 return nil 1547 } 1548 1549 // SetSpoofing enables or disables address spoofing in the given NIC, allowing 1550 // endpoints to bind to any address in the NIC. 1551 func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error { 1552 s.mu.RLock() 1553 defer s.mu.RUnlock() 1554 1555 nic, ok := s.nics[nicID] 1556 if !ok { 1557 return &tcpip.ErrUnknownNICID{} 1558 } 1559 1560 nic.setSpoofing(enable) 1561 1562 return nil 1563 } 1564 1565 // LinkResolutionResult is the result of a link address resolution attempt. 1566 type LinkResolutionResult struct { 1567 LinkAddress tcpip.LinkAddress 1568 Err tcpip.Error 1569 } 1570 1571 // GetLinkAddress finds the link address corresponding to a network address. 1572 // 1573 // Returns ErrNotSupported if the stack is not configured with a link address 1574 // resolver for the specified network protocol. 1575 // 1576 // Returns ErrWouldBlock if the link address is not readily available, along 1577 // with a notification channel for the caller to block on. Triggers address 1578 // resolution asynchronously. 1579 // 1580 // onResolve will be called either immediately, if resolution is not required, 1581 // or when address resolution is complete, with the resolved link address and 1582 // whether resolution succeeded. 1583 // 1584 // If specified, the local address must be an address local to the interface 1585 // the neighbor cache belongs to. The local address is the source address of 1586 // a packet prompting NUD/link address resolution. 1587 func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error { 1588 s.mu.RLock() 1589 nic, ok := s.nics[nicID] 1590 s.mu.RUnlock() 1591 if !ok { 1592 return &tcpip.ErrUnknownNICID{} 1593 } 1594 1595 return nic.getLinkAddress(addr, localAddr, protocol, onResolve) 1596 } 1597 1598 // Neighbors returns all IP to MAC address associations. 1599 func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) { 1600 s.mu.RLock() 1601 nic, ok := s.nics[nicID] 1602 s.mu.RUnlock() 1603 1604 if !ok { 1605 return nil, &tcpip.ErrUnknownNICID{} 1606 } 1607 1608 return nic.neighbors(protocol) 1609 } 1610 1611 // AddStaticNeighbor statically associates an IP address to a MAC address. 1612 func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error { 1613 s.mu.RLock() 1614 nic, ok := s.nics[nicID] 1615 s.mu.RUnlock() 1616 1617 if !ok { 1618 return &tcpip.ErrUnknownNICID{} 1619 } 1620 1621 return nic.addStaticNeighbor(addr, protocol, linkAddr) 1622 } 1623 1624 // RemoveNeighbor removes an IP to MAC address association previously created 1625 // either automatically or by AddStaticNeighbor. Returns ErrBadAddress if there 1626 // is no association with the provided address. 1627 func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error { 1628 s.mu.RLock() 1629 nic, ok := s.nics[nicID] 1630 s.mu.RUnlock() 1631 1632 if !ok { 1633 return &tcpip.ErrUnknownNICID{} 1634 } 1635 1636 return nic.removeNeighbor(protocol, addr) 1637 } 1638 1639 // ClearNeighbors removes all IP to MAC address associations. 1640 func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error { 1641 s.mu.RLock() 1642 nic, ok := s.nics[nicID] 1643 s.mu.RUnlock() 1644 1645 if !ok { 1646 return &tcpip.ErrUnknownNICID{} 1647 } 1648 1649 return nic.clearNeighbors(protocol) 1650 } 1651 1652 // RegisterTransportEndpoint registers the given endpoint with the stack 1653 // transport dispatcher. Received packets that match the provided id will be 1654 // delivered to the given endpoint; specifying a nic is optional, but 1655 // nic-specific IDs have precedence over global ones. 1656 func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1657 return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1658 } 1659 1660 // CheckRegisterTransportEndpoint checks if an endpoint can be registered with 1661 // the stack transport dispatcher. 1662 func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error { 1663 return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice) 1664 } 1665 1666 // UnregisterTransportEndpoint removes the endpoint with the given id from the 1667 // stack transport dispatcher. 1668 func (s *Stack) UnregisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1669 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1670 } 1671 1672 // StartTransportEndpointCleanup removes the endpoint with the given id from 1673 // the stack transport dispatcher. It also transitions it to the cleanup stage. 1674 func (s *Stack) StartTransportEndpointCleanup(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { 1675 s.cleanupEndpointsMu.Lock() 1676 s.cleanupEndpoints[ep] = struct{}{} 1677 s.cleanupEndpointsMu.Unlock() 1678 1679 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) 1680 } 1681 1682 // CompleteTransportEndpointCleanup removes the endpoint from the cleanup 1683 // stage. 1684 func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) { 1685 s.cleanupEndpointsMu.Lock() 1686 delete(s.cleanupEndpoints, ep) 1687 s.cleanupEndpointsMu.Unlock() 1688 } 1689 1690 // FindTransportEndpoint finds an endpoint that most closely matches the provided 1691 // id. If no endpoint is found it returns nil. 1692 func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint { 1693 return s.demux.findTransportEndpoint(netProto, transProto, id, nicID) 1694 } 1695 1696 // RegisterRawTransportEndpoint registers the given endpoint with the stack 1697 // transport dispatcher. Received packets that match the provided transport 1698 // protocol will be delivered to the given endpoint. 1699 func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error { 1700 return s.demux.registerRawEndpoint(netProto, transProto, ep) 1701 } 1702 1703 // UnregisterRawTransportEndpoint removes the endpoint for the transport 1704 // protocol from the stack transport dispatcher. 1705 func (s *Stack) UnregisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) { 1706 s.demux.unregisterRawEndpoint(netProto, transProto, ep) 1707 } 1708 1709 // RegisterRestoredEndpoint records e as an endpoint that has been restored on 1710 // this stack. 1711 func (s *Stack) RegisterRestoredEndpoint(e RestoredEndpoint) { 1712 s.mu.Lock() 1713 defer s.mu.Unlock() 1714 1715 s.restoredEndpoints = append(s.restoredEndpoints, e) 1716 } 1717 1718 // RegisterResumableEndpoint records e as an endpoint that has to be resumed. 1719 func (s *Stack) RegisterResumableEndpoint(e ResumableEndpoint) { 1720 s.mu.Lock() 1721 defer s.mu.Unlock() 1722 1723 s.resumableEndpoints = append(s.resumableEndpoints, e) 1724 } 1725 1726 // RegisteredEndpoints returns all endpoints which are currently registered. 1727 func (s *Stack) RegisteredEndpoints() []TransportEndpoint { 1728 s.mu.Lock() 1729 defer s.mu.Unlock() 1730 1731 var es []TransportEndpoint 1732 for _, e := range s.demux.protocol { 1733 es = append(es, e.transportEndpoints()...) 1734 } 1735 return es 1736 } 1737 1738 // CleanupEndpoints returns endpoints currently in the cleanup state. 1739 func (s *Stack) CleanupEndpoints() []TransportEndpoint { 1740 s.cleanupEndpointsMu.Lock() 1741 defer s.cleanupEndpointsMu.Unlock() 1742 1743 es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints)) 1744 for e := range s.cleanupEndpoints { 1745 es = append(es, e) 1746 } 1747 return es 1748 } 1749 1750 // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful 1751 // for restoring a stack after a save. 1752 func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) { 1753 s.cleanupEndpointsMu.Lock() 1754 defer s.cleanupEndpointsMu.Unlock() 1755 1756 for _, e := range es { 1757 s.cleanupEndpoints[e] = struct{}{} 1758 } 1759 } 1760 1761 // Close closes all currently registered transport endpoints. 1762 // 1763 // Endpoints created or modified during this call may not get closed. 1764 func (s *Stack) Close() { 1765 for _, e := range s.RegisteredEndpoints() { 1766 e.Abort() 1767 } 1768 for _, p := range s.transportProtocols { 1769 p.proto.Close() 1770 } 1771 for _, p := range s.networkProtocols { 1772 p.Close() 1773 } 1774 } 1775 1776 // Wait waits for all transport and link endpoints to halt their worker 1777 // goroutines. 1778 // 1779 // Endpoints created or modified during this call may not get waited on. 1780 // 1781 // Note that link endpoints must be stopped via an implementation specific 1782 // mechanism. 1783 func (s *Stack) Wait() { 1784 for _, e := range s.RegisteredEndpoints() { 1785 e.Wait() 1786 } 1787 for _, e := range s.CleanupEndpoints() { 1788 e.Wait() 1789 } 1790 for _, p := range s.transportProtocols { 1791 p.proto.Wait() 1792 } 1793 for _, p := range s.networkProtocols { 1794 p.Wait() 1795 } 1796 1797 s.mu.Lock() 1798 defer s.mu.Unlock() 1799 1800 for id, n := range s.nics { 1801 // Remove NIC to ensure that qDisc goroutines are correctly 1802 // terminated on stack teardown. 1803 s.removeNICLocked(id) 1804 n.NetworkLinkEndpoint.Wait() 1805 } 1806 } 1807 1808 // Destroy destroys the stack with all endpoints. 1809 func (s *Stack) Destroy() { 1810 s.Close() 1811 s.Wait() 1812 } 1813 1814 // Pause pauses any protocol level background workers. 1815 func (s *Stack) Pause() { 1816 for _, p := range s.transportProtocols { 1817 p.proto.Pause() 1818 } 1819 } 1820 1821 // Restore restarts the stack after a restore. This must be called after the 1822 // entire system has been restored. 1823 func (s *Stack) Restore() { 1824 // RestoredEndpoint.Restore() may call other methods on s, so we can't hold 1825 // s.mu while restoring the endpoints. 1826 s.mu.Lock() 1827 eps := s.restoredEndpoints 1828 s.restoredEndpoints = nil 1829 s.mu.Unlock() 1830 for _, e := range eps { 1831 e.Restore(s) 1832 } 1833 // Now resume any protocol level background workers. 1834 for _, p := range s.transportProtocols { 1835 p.proto.Resume() 1836 } 1837 } 1838 1839 // Resume resumes the stack after a save. 1840 func (s *Stack) Resume() { 1841 s.mu.Lock() 1842 eps := s.resumableEndpoints 1843 s.resumableEndpoints = nil 1844 s.mu.Unlock() 1845 for _, e := range eps { 1846 e.Resume() 1847 } 1848 // Now resume any protocol level background workers. 1849 for _, p := range s.transportProtocols { 1850 p.proto.Resume() 1851 } 1852 } 1853 1854 // RegisterPacketEndpoint registers ep with the stack, causing it to receive 1855 // all traffic of the specified netProto on the given NIC. If nicID is 0, it 1856 // receives traffic from every NIC. 1857 func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error { 1858 s.mu.Lock() 1859 defer s.mu.Unlock() 1860 1861 // If no NIC is specified, capture on all devices. 1862 if nicID == 0 { 1863 // Register with each NIC. 1864 for _, nic := range s.nics { 1865 nic.registerPacketEndpoint(netProto, ep) 1866 } 1867 return nil 1868 } 1869 1870 // Capture on a specific device. 1871 nic, ok := s.nics[nicID] 1872 if !ok { 1873 return &tcpip.ErrUnknownNICID{} 1874 } 1875 nic.registerPacketEndpoint(netProto, ep) 1876 1877 return nil 1878 } 1879 1880 // UnregisterPacketEndpoint unregisters ep for packets of the specified 1881 // netProto from the specified NIC. If nicID is 0, ep is unregistered from all 1882 // NICs. 1883 func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1884 s.mu.Lock() 1885 defer s.mu.Unlock() 1886 s.unregisterPacketEndpointLocked(nicID, netProto, ep) 1887 } 1888 1889 // +checklocks:s.mu 1890 func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1891 // If no NIC is specified, unregister on all devices. 1892 if nicID == 0 { 1893 // Unregister with each NIC. 1894 for _, nic := range s.nics { 1895 nic.unregisterPacketEndpoint(netProto, ep) 1896 } 1897 return 1898 } 1899 1900 // Unregister in a single device. 1901 nic, ok := s.nics[nicID] 1902 if !ok { 1903 return 1904 } 1905 nic.unregisterPacketEndpoint(netProto, ep) 1906 } 1907 1908 // WritePacketToRemote writes a payload on the specified NIC using the provided 1909 // network protocol and remote link address. 1910 func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1911 s.mu.Lock() 1912 nic, ok := s.nics[nicID] 1913 s.mu.Unlock() 1914 if !ok { 1915 return &tcpip.ErrUnknownDevice{} 1916 } 1917 pkt := NewPacketBuffer(PacketBufferOptions{ 1918 ReserveHeaderBytes: int(nic.MaxHeaderLength()), 1919 Payload: payload, 1920 }) 1921 defer pkt.DecRef() 1922 pkt.NetworkProtocolNumber = netProto 1923 return nic.WritePacketToRemote(remote, pkt) 1924 } 1925 1926 // WriteRawPacket writes data directly to the specified NIC without adding any 1927 // headers. 1928 func (s *Stack) WriteRawPacket(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error { 1929 s.mu.RLock() 1930 nic, ok := s.nics[nicID] 1931 s.mu.RUnlock() 1932 if !ok { 1933 return &tcpip.ErrUnknownNICID{} 1934 } 1935 1936 pkt := NewPacketBuffer(PacketBufferOptions{ 1937 Payload: payload, 1938 }) 1939 defer pkt.DecRef() 1940 pkt.NetworkProtocolNumber = proto 1941 return nic.writeRawPacketWithLinkHeaderInPayload(pkt) 1942 } 1943 1944 // NetworkProtocolInstance returns the protocol instance in the stack for the 1945 // specified network protocol. This method is public for protocol implementers 1946 // and tests to use. 1947 func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol { 1948 if p, ok := s.networkProtocols[num]; ok { 1949 return p 1950 } 1951 return nil 1952 } 1953 1954 // TransportProtocolInstance returns the protocol instance in the stack for the 1955 // specified transport protocol. This method is public for protocol implementers 1956 // and tests to use. 1957 func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol { 1958 if pState, ok := s.transportProtocols[num]; ok { 1959 return pState.proto 1960 } 1961 return nil 1962 } 1963 1964 // AddTCPProbe installs a probe function that will be invoked on every segment 1965 // received by a given TCP endpoint. The probe function is passed a copy of the 1966 // TCP endpoint state before and after processing of the segment. 1967 // 1968 // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints 1969 // created prior to this call will not call the probe function. 1970 // 1971 // Further, installing two different probes back to back can result in some 1972 // endpoints calling the first one and some the second one. There is no 1973 // guarantee provided on which probe will be invoked. Ideally this should only 1974 // be called once per stack. 1975 func (s *Stack) AddTCPProbe(probe TCPProbeFunc) { 1976 s.tcpProbeFunc.Store(probe) 1977 } 1978 1979 // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil 1980 // otherwise. 1981 func (s *Stack) GetTCPProbe() TCPProbeFunc { 1982 p := s.tcpProbeFunc.Load() 1983 if p == nil { 1984 return nil 1985 } 1986 return p.(TCPProbeFunc) 1987 } 1988 1989 // RemoveTCPProbe removes an installed TCP probe. 1990 // 1991 // NOTE: This only ensures that endpoints created after this call do not 1992 // have a probe attached. Endpoints already created will continue to invoke 1993 // TCP probe. 1994 func (s *Stack) RemoveTCPProbe() { 1995 // This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics. 1996 s.tcpProbeFunc.Store(TCPProbeFunc(nil)) 1997 } 1998 1999 // JoinGroup joins the given multicast group on the given NIC. 2000 func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 2001 s.mu.RLock() 2002 defer s.mu.RUnlock() 2003 2004 if nic, ok := s.nics[nicID]; ok { 2005 return nic.joinGroup(protocol, multicastAddr) 2006 } 2007 return &tcpip.ErrUnknownNICID{} 2008 } 2009 2010 // LeaveGroup leaves the given multicast group on the given NIC. 2011 func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error { 2012 s.mu.RLock() 2013 defer s.mu.RUnlock() 2014 2015 if nic, ok := s.nics[nicID]; ok { 2016 return nic.leaveGroup(protocol, multicastAddr) 2017 } 2018 return &tcpip.ErrUnknownNICID{} 2019 } 2020 2021 // IsInGroup returns true if the NIC with ID nicID has joined the multicast 2022 // group multicastAddr. 2023 func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) { 2024 s.mu.RLock() 2025 defer s.mu.RUnlock() 2026 2027 if nic, ok := s.nics[nicID]; ok { 2028 return nic.isInGroup(multicastAddr), nil 2029 } 2030 return false, &tcpip.ErrUnknownNICID{} 2031 } 2032 2033 // IPTables returns the stack's iptables. 2034 func (s *Stack) IPTables() *IPTables { 2035 return s.tables 2036 } 2037 2038 // ICMPLimit returns the maximum number of ICMP messages that can be sent 2039 // in one second. 2040 func (s *Stack) ICMPLimit() rate.Limit { 2041 return s.icmpRateLimiter.Limit() 2042 } 2043 2044 // SetICMPLimit sets the maximum number of ICMP messages that be sent 2045 // in one second. 2046 func (s *Stack) SetICMPLimit(newLimit rate.Limit) { 2047 s.icmpRateLimiter.SetLimit(newLimit) 2048 } 2049 2050 // ICMPBurst returns the maximum number of ICMP messages that can be sent 2051 // in a single burst. 2052 func (s *Stack) ICMPBurst() int { 2053 return s.icmpRateLimiter.Burst() 2054 } 2055 2056 // SetICMPBurst sets the maximum number of ICMP messages that can be sent 2057 // in a single burst. 2058 func (s *Stack) SetICMPBurst(burst int) { 2059 s.icmpRateLimiter.SetBurst(burst) 2060 } 2061 2062 // AllowICMPMessage returns true if we the rate limiter allows at least one 2063 // ICMP message to be sent at this instant. 2064 func (s *Stack) AllowICMPMessage() bool { 2065 return s.icmpRateLimiter.Allow() 2066 } 2067 2068 // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol 2069 // number installed on the specified NIC. 2070 func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) { 2071 s.mu.Lock() 2072 defer s.mu.Unlock() 2073 2074 nic, ok := s.nics[nicID] 2075 if !ok { 2076 return nil, &tcpip.ErrUnknownNICID{} 2077 } 2078 2079 return nic.getNetworkEndpoint(proto), nil 2080 } 2081 2082 // NUDConfigurations gets the per-interface NUD configurations. 2083 func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) { 2084 s.mu.RLock() 2085 nic, ok := s.nics[id] 2086 s.mu.RUnlock() 2087 2088 if !ok { 2089 return NUDConfigurations{}, &tcpip.ErrUnknownNICID{} 2090 } 2091 2092 return nic.nudConfigs(proto) 2093 } 2094 2095 // SetNUDConfigurations sets the per-interface NUD configurations. 2096 // 2097 // Note, if c contains invalid NUD configuration values, it will be fixed to 2098 // use default values for the erroneous values. 2099 func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error { 2100 s.mu.RLock() 2101 nic, ok := s.nics[id] 2102 s.mu.RUnlock() 2103 2104 if !ok { 2105 return &tcpip.ErrUnknownNICID{} 2106 } 2107 2108 return nic.setNUDConfigs(proto, c) 2109 } 2110 2111 // Seed returns a 32 bit value that can be used as a seed value. 2112 // 2113 // NOTE: The seed is generated once during stack initialization only. 2114 func (s *Stack) Seed() uint32 { 2115 return s.seed 2116 } 2117 2118 // InsecureRNG returns a reference to a pseudo random generator that can be used 2119 // to generate random numbers as required. It is not cryptographically secure 2120 // and should not be used for security sensitive work. 2121 func (s *Stack) InsecureRNG() *rand.Rand { 2122 return s.insecureRNG 2123 } 2124 2125 // SecureRNG returns the stack's cryptographically secure random number 2126 // generator. 2127 func (s *Stack) SecureRNG() cryptorand.RNG { 2128 return s.secureRNG 2129 } 2130 2131 // FindNICNameFromID returns the name of the NIC for the given NICID. 2132 func (s *Stack) FindNICNameFromID(id tcpip.NICID) string { 2133 s.mu.RLock() 2134 defer s.mu.RUnlock() 2135 2136 nic, ok := s.nics[id] 2137 if !ok { 2138 return "" 2139 } 2140 2141 return nic.Name() 2142 } 2143 2144 // ParseResult indicates the result of a parsing attempt. 2145 type ParseResult int 2146 2147 const ( 2148 // ParsedOK indicates that a packet was successfully parsed. 2149 ParsedOK ParseResult = iota 2150 2151 // UnknownTransportProtocol indicates that the transport protocol is unknown. 2152 UnknownTransportProtocol 2153 2154 // TransportLayerParseError indicates that the transport packet was not 2155 // successfully parsed. 2156 TransportLayerParseError 2157 ) 2158 2159 // ParsePacketBufferTransport parses the provided packet buffer's transport 2160 // header. 2161 func (s *Stack) ParsePacketBufferTransport(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) ParseResult { 2162 pkt.TransportProtocolNumber = protocol 2163 // Parse the transport header if present. 2164 state, ok := s.transportProtocols[protocol] 2165 if !ok { 2166 return UnknownTransportProtocol 2167 } 2168 2169 if !state.proto.Parse(pkt) { 2170 return TransportLayerParseError 2171 } 2172 2173 return ParsedOK 2174 } 2175 2176 // networkProtocolNumbers returns the network protocol numbers the stack is 2177 // configured with. 2178 func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber { 2179 protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols)) 2180 for p := range s.networkProtocols { 2181 protos = append(protos, p) 2182 } 2183 return protos 2184 } 2185 2186 func isSubnetBroadcastOnNIC(nic *nic, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2187 addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint) 2188 if addressEndpoint == nil { 2189 return false 2190 } 2191 2192 subnet := addressEndpoint.Subnet() 2193 addressEndpoint.DecRef() 2194 return subnet.IsBroadcast(addr) 2195 } 2196 2197 // IsSubnetBroadcast returns true if the provided address is a subnet-local 2198 // broadcast address on the specified NIC and protocol. 2199 // 2200 // Returns false if the NIC is unknown or if the protocol is unknown or does 2201 // not support addressing. 2202 // 2203 // If the NIC is not specified, the stack will check all NICs. 2204 func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool { 2205 s.mu.RLock() 2206 defer s.mu.RUnlock() 2207 2208 if nicID != 0 { 2209 nic, ok := s.nics[nicID] 2210 if !ok { 2211 return false 2212 } 2213 2214 return isSubnetBroadcastOnNIC(nic, protocol, addr) 2215 } 2216 2217 for _, nic := range s.nics { 2218 if isSubnetBroadcastOnNIC(nic, protocol, addr) { 2219 return true 2220 } 2221 } 2222 2223 return false 2224 } 2225 2226 // PacketEndpointWriteSupported returns true iff packet endpoints support write 2227 // operations. 2228 func (s *Stack) PacketEndpointWriteSupported() bool { 2229 return s.packetEndpointWriteSupported 2230 }