github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/registration.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stack 16 17 import ( 18 "fmt" 19 "time" 20 21 "github.com/SagerNet/gvisor/pkg/tcpip" 22 "github.com/SagerNet/gvisor/pkg/tcpip/buffer" 23 "github.com/SagerNet/gvisor/pkg/tcpip/header" 24 "github.com/SagerNet/gvisor/pkg/waiter" 25 ) 26 27 // NetworkEndpointID is the identifier of a network layer protocol endpoint. 28 // Currently the local address is sufficient because all supported protocols 29 // (i.e., IPv4 and IPv6) have different sizes for their addresses. 30 type NetworkEndpointID struct { 31 LocalAddress tcpip.Address 32 } 33 34 // TransportEndpointID is the identifier of a transport layer protocol endpoint. 35 // 36 // +stateify savable 37 type TransportEndpointID struct { 38 // LocalPort is the local port associated with the endpoint. 39 LocalPort uint16 40 41 // LocalAddress is the local [network layer] address associated with 42 // the endpoint. 43 LocalAddress tcpip.Address 44 45 // RemotePort is the remote port associated with the endpoint. 46 RemotePort uint16 47 48 // RemoteAddress it the remote [network layer] address associated with 49 // the endpoint. 50 RemoteAddress tcpip.Address 51 } 52 53 // NetworkPacketInfo holds information about a network layer packet. 54 type NetworkPacketInfo struct { 55 // LocalAddressBroadcast is true if the packet's local address is a broadcast 56 // address. 57 LocalAddressBroadcast bool 58 59 // IsForwardedPacket is true if the packet is being forwarded. 60 IsForwardedPacket bool 61 } 62 63 // TransportErrorKind enumerates error types that are handled by the transport 64 // layer. 65 type TransportErrorKind int 66 67 const ( 68 // PacketTooBigTransportError indicates that a packet did not reach its 69 // destination because a link on the path to the destination had an MTU that 70 // was too small to carry the packet. 71 PacketTooBigTransportError TransportErrorKind = iota 72 73 // DestinationHostUnreachableTransportError indicates that the destination 74 // host was unreachable. 75 DestinationHostUnreachableTransportError 76 77 // DestinationPortUnreachableTransportError indicates that a packet reached 78 // the destination host, but the transport protocol was not active on the 79 // destination port. 80 DestinationPortUnreachableTransportError 81 82 // DestinationNetworkUnreachableTransportError indicates that the destination 83 // network was unreachable. 84 DestinationNetworkUnreachableTransportError 85 ) 86 87 // TransportError is a marker interface for errors that may be handled by the 88 // transport layer. 89 type TransportError interface { 90 tcpip.SockErrorCause 91 92 // Kind returns the type of the transport error. 93 Kind() TransportErrorKind 94 } 95 96 // TransportEndpoint is the interface that needs to be implemented by transport 97 // protocol (e.g., tcp, udp) endpoints that can handle packets. 98 type TransportEndpoint interface { 99 // UniqueID returns an unique ID for this transport endpoint. 100 UniqueID() uint64 101 102 // HandlePacket is called by the stack when new packets arrive to this 103 // transport endpoint. It sets the packet buffer's transport header. 104 // 105 // HandlePacket takes ownership of the packet. 106 HandlePacket(TransportEndpointID, *PacketBuffer) 107 108 // HandleError is called when the transport endpoint receives an error. 109 // 110 // HandleError takes ownership of the packet buffer. 111 HandleError(TransportError, *PacketBuffer) 112 113 // Abort initiates an expedited endpoint teardown. It puts the endpoint 114 // in a closed state and frees all resources associated with it. This 115 // cleanup may happen asynchronously. Wait can be used to block on this 116 // asynchronous cleanup. 117 Abort() 118 119 // Wait waits for any worker goroutines owned by the endpoint to stop. 120 // 121 // An endpoint can be requested to stop its worker goroutines by calling 122 // its Close method. 123 // 124 // Wait will not block if the endpoint hasn't started any goroutines 125 // yet, even if it might later. 126 Wait() 127 } 128 129 // RawTransportEndpoint is the interface that needs to be implemented by raw 130 // transport protocol endpoints. RawTransportEndpoints receive the entire 131 // packet - including the network and transport headers - as delivered to 132 // netstack. 133 type RawTransportEndpoint interface { 134 // HandlePacket is called by the stack when new packets arrive to 135 // this transport endpoint. The packet contains all data from the link 136 // layer up. 137 // 138 // HandlePacket takes ownership of the packet. 139 HandlePacket(*PacketBuffer) 140 } 141 142 // PacketEndpoint is the interface that needs to be implemented by packet 143 // transport protocol endpoints. These endpoints receive link layer headers in 144 // addition to whatever they contain (usually network and transport layer 145 // headers and a payload). 146 type PacketEndpoint interface { 147 // HandlePacket is called by the stack when new packets arrive that 148 // match the endpoint. 149 // 150 // Implementers should treat packet as immutable and should copy it 151 // before before modification. 152 // 153 // linkHeader may have a length of 0, in which case the PacketEndpoint 154 // should construct its own ethernet header for applications. 155 // 156 // HandlePacket takes ownership of pkt. 157 HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 158 } 159 160 // UnknownDestinationPacketDisposition enumerates the possible return values from 161 // HandleUnknownDestinationPacket(). 162 type UnknownDestinationPacketDisposition int 163 164 const ( 165 // UnknownDestinationPacketMalformed denotes that the packet was malformed 166 // and no further processing should be attempted other than updating 167 // statistics. 168 UnknownDestinationPacketMalformed UnknownDestinationPacketDisposition = iota 169 170 // UnknownDestinationPacketUnhandled tells the caller that the packet was 171 // well formed but that the issue was not handled and the stack should take 172 // the default action. 173 UnknownDestinationPacketUnhandled 174 175 // UnknownDestinationPacketHandled tells the caller that it should do 176 // no further processing. 177 UnknownDestinationPacketHandled 178 ) 179 180 // TransportProtocol is the interface that needs to be implemented by transport 181 // protocols (e.g., tcp, udp) that want to be part of the networking stack. 182 type TransportProtocol interface { 183 // Number returns the transport protocol number. 184 Number() tcpip.TransportProtocolNumber 185 186 // NewEndpoint creates a new endpoint of the transport protocol. 187 NewEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) 188 189 // NewRawEndpoint creates a new raw endpoint of the transport protocol. 190 NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) 191 192 // MinimumPacketSize returns the minimum valid packet size of this 193 // transport protocol. The stack automatically drops any packets smaller 194 // than this targeted at this protocol. 195 MinimumPacketSize() int 196 197 // ParsePorts returns the source and destination ports stored in a 198 // packet of this protocol. 199 ParsePorts(v buffer.View) (src, dst uint16, err tcpip.Error) 200 201 // HandleUnknownDestinationPacket handles packets targeted at this 202 // protocol that don't match any existing endpoint. For example, 203 // it is targeted at a port that has no listeners. 204 // 205 // HandleUnknownDestinationPacket takes ownership of the packet if it handles 206 // the issue. 207 HandleUnknownDestinationPacket(TransportEndpointID, *PacketBuffer) UnknownDestinationPacketDisposition 208 209 // SetOption allows enabling/disabling protocol specific features. 210 // SetOption returns an error if the option is not supported or the 211 // provided option value is invalid. 212 SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error 213 214 // Option allows retrieving protocol specific option values. 215 // Option returns an error if the option is not supported or the 216 // provided option value is invalid. 217 Option(option tcpip.GettableTransportProtocolOption) tcpip.Error 218 219 // Close requests that any worker goroutines owned by the protocol 220 // stop. 221 Close() 222 223 // Wait waits for any worker goroutines owned by the protocol to stop. 224 Wait() 225 226 // Parse sets pkt.TransportHeader and trims pkt.Data appropriately. It does 227 // neither and returns false if pkt.Data is too small, i.e. pkt.Data.Size() < 228 // MinimumPacketSize() 229 Parse(pkt *PacketBuffer) (ok bool) 230 } 231 232 // TransportPacketDisposition is the result from attempting to deliver a packet 233 // to the transport layer. 234 type TransportPacketDisposition int 235 236 const ( 237 // TransportPacketHandled indicates that a transport packet was handled by the 238 // transport layer and callers need not take any further action. 239 TransportPacketHandled TransportPacketDisposition = iota 240 241 // TransportPacketProtocolUnreachable indicates that the transport 242 // protocol requested in the packet is not supported. 243 TransportPacketProtocolUnreachable 244 245 // TransportPacketDestinationPortUnreachable indicates that there weren't any 246 // listeners interested in the packet and the transport protocol has no means 247 // to notify the sender. 248 TransportPacketDestinationPortUnreachable 249 ) 250 251 // TransportDispatcher contains the methods used by the network stack to deliver 252 // packets to the appropriate transport endpoint after it has been handled by 253 // the network layer. 254 type TransportDispatcher interface { 255 // DeliverTransportPacket delivers packets to the appropriate 256 // transport protocol endpoint. 257 // 258 // pkt.NetworkHeader must be set before calling DeliverTransportPacket. 259 // 260 // DeliverTransportPacket takes ownership of the packet. 261 DeliverTransportPacket(tcpip.TransportProtocolNumber, *PacketBuffer) TransportPacketDisposition 262 263 // DeliverTransportError delivers an error to the appropriate transport 264 // endpoint. 265 // 266 // DeliverTransportError takes ownership of the packet buffer. 267 DeliverTransportError(local, remote tcpip.Address, _ tcpip.NetworkProtocolNumber, _ tcpip.TransportProtocolNumber, _ TransportError, _ *PacketBuffer) 268 269 // DeliverRawPacket delivers a packet to any subscribed raw sockets. 270 // 271 // DeliverRawPacket does NOT take ownership of the packet buffer. 272 DeliverRawPacket(tcpip.TransportProtocolNumber, *PacketBuffer) 273 } 274 275 // PacketLooping specifies where an outbound packet should be sent. 276 type PacketLooping byte 277 278 const ( 279 // PacketOut indicates that the packet should be passed to the link 280 // endpoint. 281 PacketOut PacketLooping = 1 << iota 282 283 // PacketLoop indicates that the packet should be handled locally. 284 PacketLoop 285 ) 286 287 // NetworkHeaderParams are the header parameters given as input by the 288 // transport endpoint to the network. 289 type NetworkHeaderParams struct { 290 // Protocol refers to the transport protocol number. 291 Protocol tcpip.TransportProtocolNumber 292 293 // TTL refers to Time To Live field of the IP-header. 294 TTL uint8 295 296 // TOS refers to TypeOfService or TrafficClass field of the IP-header. 297 TOS uint8 298 } 299 300 // GroupAddressableEndpoint is an endpoint that supports group addressing. 301 // 302 // An endpoint is considered to support group addressing when one or more 303 // endpoints may associate themselves with the same identifier (group address). 304 type GroupAddressableEndpoint interface { 305 // JoinGroup joins the specified group. 306 JoinGroup(group tcpip.Address) tcpip.Error 307 308 // LeaveGroup attempts to leave the specified group. 309 LeaveGroup(group tcpip.Address) tcpip.Error 310 311 // IsInGroup returns true if the endpoint is a member of the specified group. 312 IsInGroup(group tcpip.Address) bool 313 } 314 315 // PrimaryEndpointBehavior is an enumeration of an AddressEndpoint's primary 316 // behavior. 317 type PrimaryEndpointBehavior int 318 319 const ( 320 // CanBePrimaryEndpoint indicates the endpoint can be used as a primary 321 // endpoint for new connections with no local address. This is the 322 // default when calling NIC.AddAddress. 323 CanBePrimaryEndpoint PrimaryEndpointBehavior = iota 324 325 // FirstPrimaryEndpoint indicates the endpoint should be the first 326 // primary endpoint considered. If there are multiple endpoints with 327 // this behavior, they are ordered by recency. 328 FirstPrimaryEndpoint 329 330 // NeverPrimaryEndpoint indicates the endpoint should never be a 331 // primary endpoint. 332 NeverPrimaryEndpoint 333 ) 334 335 // AddressConfigType is the method used to add an address. 336 type AddressConfigType int 337 338 const ( 339 // AddressConfigStatic is a statically configured address endpoint that was 340 // added by some user-specified action (adding an explicit address, joining a 341 // multicast group). 342 AddressConfigStatic AddressConfigType = iota 343 344 // AddressConfigSlaac is an address endpoint added by SLAAC, as per RFC 4862 345 // section 5.5.3. 346 AddressConfigSlaac 347 348 // AddressConfigSlaacTemp is a temporary address endpoint added by SLAAC as 349 // per RFC 4941. Temporary SLAAC addresses are short-lived and are not 350 // to be valid (or preferred) forever; hence the term temporary. 351 AddressConfigSlaacTemp 352 ) 353 354 // AssignableAddressEndpoint is a reference counted address endpoint that may be 355 // assigned to a NetworkEndpoint. 356 type AssignableAddressEndpoint interface { 357 // AddressWithPrefix returns the endpoint's address. 358 AddressWithPrefix() tcpip.AddressWithPrefix 359 360 // Subnet returns the subnet of the endpoint's address. 361 Subnet() tcpip.Subnet 362 363 // IsAssigned returns whether or not the endpoint is considered bound 364 // to its NetworkEndpoint. 365 IsAssigned(allowExpired bool) bool 366 367 // IncRef increments this endpoint's reference count. 368 // 369 // Returns true if it was successfully incremented. If it returns false, then 370 // the endpoint is considered expired and should no longer be used. 371 IncRef() bool 372 373 // DecRef decrements this endpoint's reference count. 374 DecRef() 375 } 376 377 // AddressEndpoint is an endpoint representing an address assigned to an 378 // AddressableEndpoint. 379 type AddressEndpoint interface { 380 AssignableAddressEndpoint 381 382 // GetKind returns the address kind for this endpoint. 383 GetKind() AddressKind 384 385 // SetKind sets the address kind for this endpoint. 386 SetKind(AddressKind) 387 388 // ConfigType returns the method used to add the address. 389 ConfigType() AddressConfigType 390 391 // Deprecated returns whether or not this endpoint is deprecated. 392 Deprecated() bool 393 394 // SetDeprecated sets this endpoint's deprecated status. 395 SetDeprecated(bool) 396 } 397 398 // AddressKind is the kind of an address. 399 // 400 // See the values of AddressKind for more details. 401 type AddressKind int 402 403 const ( 404 // PermanentTentative is a permanent address endpoint that is not yet 405 // considered to be fully bound to an interface in the traditional 406 // sense. That is, the address is associated with a NIC, but packets 407 // destined to the address MUST NOT be accepted and MUST be silently 408 // dropped, and the address MUST NOT be used as a source address for 409 // outgoing packets. For IPv6, addresses are of this kind until NDP's 410 // Duplicate Address Detection (DAD) resolves. If DAD fails, the address 411 // is removed. 412 PermanentTentative AddressKind = iota 413 414 // Permanent is a permanent endpoint (vs. a temporary one) assigned to the 415 // NIC. Its reference count is biased by 1 to avoid removal when no route 416 // holds a reference to it. It is removed by explicitly removing the address 417 // from the NIC. 418 Permanent 419 420 // PermanentExpired is a permanent endpoint that had its address removed from 421 // the NIC, and it is waiting to be removed once no references to it are held. 422 // 423 // If the address is re-added before the endpoint is removed, its type 424 // changes back to Permanent. 425 PermanentExpired 426 427 // Temporary is an endpoint, created on a one-off basis to temporarily 428 // consider the NIC bound an an address that it is not explicitly bound to 429 // (such as a permanent address). Its reference count must not be biased by 1 430 // so that the address is removed immediately when references to it are no 431 // longer held. 432 // 433 // A temporary endpoint may be promoted to permanent if the address is added 434 // permanently. 435 Temporary 436 ) 437 438 // IsPermanent returns true if the AddressKind represents a permanent address. 439 func (k AddressKind) IsPermanent() bool { 440 switch k { 441 case Permanent, PermanentTentative: 442 return true 443 case Temporary, PermanentExpired: 444 return false 445 default: 446 panic(fmt.Sprintf("unrecognized address kind = %d", k)) 447 } 448 } 449 450 // AddressableEndpoint is an endpoint that supports addressing. 451 // 452 // An endpoint is considered to support addressing when the endpoint may 453 // associate itself with an identifier (address). 454 type AddressableEndpoint interface { 455 // AddAndAcquirePermanentAddress adds the passed permanent address. 456 // 457 // Returns *tcpip.ErrDuplicateAddress if the address exists. 458 // 459 // Acquires and returns the AddressEndpoint for the added address. 460 AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated bool) (AddressEndpoint, tcpip.Error) 461 462 // RemovePermanentAddress removes the passed address if it is a permanent 463 // address. 464 // 465 // Returns *tcpip.ErrBadLocalAddress if the endpoint does not have the passed 466 // permanent address. 467 RemovePermanentAddress(addr tcpip.Address) tcpip.Error 468 469 // MainAddress returns the endpoint's primary permanent address. 470 MainAddress() tcpip.AddressWithPrefix 471 472 // AcquireAssignedAddress returns an address endpoint for the passed address 473 // that is considered bound to the endpoint, optionally creating a temporary 474 // endpoint if requested and no existing address exists. 475 // 476 // The returned endpoint's reference count is incremented. 477 // 478 // Returns nil if the specified address is not local to this endpoint. 479 AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint 480 481 // AcquireOutgoingPrimaryAddress returns a primary address that may be used as 482 // a source address when sending packets to the passed remote address. 483 // 484 // If allowExpired is true, expired addresses may be returned. 485 // 486 // The returned endpoint's reference count is incremented. 487 // 488 // Returns nil if a primary address is not available. 489 AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) AddressEndpoint 490 491 // PrimaryAddresses returns the primary addresses. 492 PrimaryAddresses() []tcpip.AddressWithPrefix 493 494 // PermanentAddresses returns all the permanent addresses. 495 PermanentAddresses() []tcpip.AddressWithPrefix 496 } 497 498 // NDPEndpoint is a network endpoint that supports NDP. 499 type NDPEndpoint interface { 500 NetworkEndpoint 501 502 // InvalidateDefaultRouter invalidates a default router discovered through 503 // NDP. 504 InvalidateDefaultRouter(tcpip.Address) 505 } 506 507 // NetworkInterface is a network interface. 508 type NetworkInterface interface { 509 NetworkLinkEndpoint 510 511 // ID returns the interface's ID. 512 ID() tcpip.NICID 513 514 // IsLoopback returns true if the interface is a loopback interface. 515 IsLoopback() bool 516 517 // Name returns the name of the interface. 518 // 519 // May return an empty string if the interface is not configured with a name. 520 Name() string 521 522 // Enabled returns true if the interface is enabled. 523 Enabled() bool 524 525 // Promiscuous returns true if the interface is in promiscuous mode. 526 // 527 // When in promiscuous mode, the interface should accept all packets. 528 Promiscuous() bool 529 530 // Spoofing returns true if the interface is in spoofing mode. 531 // 532 // When in spoofing mode, the interface should consider all addresses as 533 // assigned to it. 534 Spoofing() bool 535 536 // PrimaryAddress returns the primary address associated with the interface. 537 // 538 // PrimaryAddress will return the first non-deprecated address if such an 539 // address exists. If no non-deprecated addresses exist, the first deprecated 540 // address will be returned. If no deprecated addresses exist, the zero value 541 // will be returned. 542 PrimaryAddress(tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) 543 544 // CheckLocalAddress returns true if the address exists on the interface. 545 CheckLocalAddress(tcpip.NetworkProtocolNumber, tcpip.Address) bool 546 547 // WritePacketToRemote writes the packet to the given remote link address. 548 WritePacketToRemote(tcpip.LinkAddress, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error 549 550 // WritePacket writes a packet with the given protocol through the given 551 // route. 552 // 553 // WritePacket takes ownership of the packet buffer. The packet buffer's 554 // network and transport header must be set. 555 WritePacket(*Route, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error 556 557 // WritePackets writes packets with the given protocol through the given 558 // route. Must not be called with an empty list of packet buffers. 559 // 560 // WritePackets takes ownership of the packet buffers. 561 // 562 // Right now, WritePackets is used only when the software segmentation 563 // offload is enabled. If it will be used for something else, syscall filters 564 // may need to be updated. 565 WritePackets(*Route, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) 566 567 // HandleNeighborProbe processes an incoming neighbor probe (e.g. ARP 568 // request or NDP Neighbor Solicitation). 569 // 570 // HandleNeighborProbe assumes that the probe is valid for the network 571 // interface the probe was received on. 572 HandleNeighborProbe(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress) tcpip.Error 573 574 // HandleNeighborConfirmation processes an incoming neighbor confirmation 575 // (e.g. ARP reply or NDP Neighbor Advertisement). 576 HandleNeighborConfirmation(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress, ReachabilityConfirmationFlags) tcpip.Error 577 } 578 579 // LinkResolvableNetworkEndpoint handles link resolution events. 580 type LinkResolvableNetworkEndpoint interface { 581 // HandleLinkResolutionFailure is called when link resolution prevents the 582 // argument from having been sent. 583 HandleLinkResolutionFailure(*PacketBuffer) 584 } 585 586 // NetworkEndpoint is the interface that needs to be implemented by endpoints 587 // of network layer protocols (e.g., ipv4, ipv6). 588 type NetworkEndpoint interface { 589 // Enable enables the endpoint. 590 // 591 // Must only be called when the stack is in a state that allows the endpoint 592 // to send and receive packets. 593 // 594 // Returns *tcpip.ErrNotPermitted if the endpoint cannot be enabled. 595 Enable() tcpip.Error 596 597 // Enabled returns true if the endpoint is enabled. 598 Enabled() bool 599 600 // Disable disables the endpoint. 601 Disable() 602 603 // DefaultTTL is the default time-to-live value (or hop limit, in ipv6) 604 // for this endpoint. 605 DefaultTTL() uint8 606 607 // MTU is the maximum transmission unit for this endpoint. This is 608 // generally calculated as the MTU of the underlying data link endpoint 609 // minus the network endpoint max header length. 610 MTU() uint32 611 612 // MaxHeaderLength returns the maximum size the network (and lower 613 // level layers combined) headers can have. Higher levels use this 614 // information to reserve space in the front of the packets they're 615 // building. 616 MaxHeaderLength() uint16 617 618 // WritePacket writes a packet to the given destination address and 619 // protocol. It takes ownership of pkt. pkt.TransportHeader must have 620 // already been set. 621 WritePacket(r *Route, params NetworkHeaderParams, pkt *PacketBuffer) tcpip.Error 622 623 // WritePackets writes packets to the given destination address and 624 // protocol. pkts must not be zero length. It takes ownership of pkts and 625 // underlying packets. 626 WritePackets(r *Route, pkts PacketBufferList, params NetworkHeaderParams) (int, tcpip.Error) 627 628 // WriteHeaderIncludedPacket writes a packet that includes a network 629 // header to the given destination address. It takes ownership of pkt. 630 WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) tcpip.Error 631 632 // HandlePacket is called by the link layer when new packets arrive to 633 // this network endpoint. It sets pkt.NetworkHeader. 634 // 635 // HandlePacket takes ownership of pkt. 636 HandlePacket(pkt *PacketBuffer) 637 638 // Close is called when the endpoint is removed from a stack. 639 Close() 640 641 // NetworkProtocolNumber returns the tcpip.NetworkProtocolNumber for 642 // this endpoint. 643 NetworkProtocolNumber() tcpip.NetworkProtocolNumber 644 645 // Stats returns a reference to the network endpoint stats. 646 Stats() NetworkEndpointStats 647 } 648 649 // NetworkEndpointStats is the interface implemented by each network endpoint 650 // stats struct. 651 type NetworkEndpointStats interface { 652 // IsNetworkEndpointStats is an empty method to implement the 653 // NetworkEndpointStats marker interface. 654 IsNetworkEndpointStats() 655 } 656 657 // IPNetworkEndpointStats is a NetworkEndpointStats that tracks IP-related 658 // statistics. 659 type IPNetworkEndpointStats interface { 660 NetworkEndpointStats 661 662 // IPStats returns the IP statistics of a network endpoint. 663 IPStats() *tcpip.IPStats 664 } 665 666 // ForwardingNetworkEndpoint is a network endpoint that may forward packets. 667 type ForwardingNetworkEndpoint interface { 668 NetworkEndpoint 669 670 // Forwarding returns the forwarding configuration. 671 Forwarding() bool 672 673 // SetForwarding sets the forwarding configuration. 674 SetForwarding(bool) 675 } 676 677 // NetworkProtocol is the interface that needs to be implemented by network 678 // protocols (e.g., ipv4, ipv6) that want to be part of the networking stack. 679 type NetworkProtocol interface { 680 // Number returns the network protocol number. 681 Number() tcpip.NetworkProtocolNumber 682 683 // MinimumPacketSize returns the minimum valid packet size of this 684 // network protocol. The stack automatically drops any packets smaller 685 // than this targeted at this protocol. 686 MinimumPacketSize() int 687 688 // DefaultPrefixLen returns the protocol's default prefix length. 689 DefaultPrefixLen() int 690 691 // ParseAddresses returns the source and destination addresses stored in a 692 // packet of this protocol. 693 ParseAddresses(v buffer.View) (src, dst tcpip.Address) 694 695 // NewEndpoint creates a new endpoint of this protocol. 696 NewEndpoint(nic NetworkInterface, dispatcher TransportDispatcher) NetworkEndpoint 697 698 // SetOption allows enabling/disabling protocol specific features. 699 // SetOption returns an error if the option is not supported or the 700 // provided option value is invalid. 701 SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error 702 703 // Option allows retrieving protocol specific option values. 704 // Option returns an error if the option is not supported or the 705 // provided option value is invalid. 706 Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error 707 708 // Close requests that any worker goroutines owned by the protocol 709 // stop. 710 Close() 711 712 // Wait waits for any worker goroutines owned by the protocol to stop. 713 Wait() 714 715 // Parse sets pkt.NetworkHeader and trims pkt.Data appropriately. It 716 // returns: 717 // - The encapsulated protocol, if present. 718 // - Whether there is an encapsulated transport protocol payload (e.g. ARP 719 // does not encapsulate anything). 720 // - Whether pkt.Data was large enough to parse and set pkt.NetworkHeader. 721 Parse(pkt *PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) 722 } 723 724 // NetworkDispatcher contains the methods used by the network stack to deliver 725 // inbound/outbound packets to the appropriate network/packet(if any) endpoints. 726 type NetworkDispatcher interface { 727 // DeliverNetworkPacket finds the appropriate network protocol endpoint 728 // and hands the packet over for further processing. 729 // 730 // pkt.LinkHeader may or may not be set before calling 731 // DeliverNetworkPacket. Some packets do not have link headers (e.g. 732 // packets sent via loopback), and won't have the field set. 733 // 734 // DeliverNetworkPacket takes ownership of pkt. 735 DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 736 737 // DeliverOutboundPacket is called by link layer when a packet is being 738 // sent out. 739 // 740 // pkt.LinkHeader may or may not be set before calling 741 // DeliverOutboundPacket. Some packets do not have link headers (e.g. 742 // packets sent via loopback), and won't have the field set. 743 // 744 // DeliverOutboundPacket takes ownership of pkt. 745 DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 746 } 747 748 // LinkEndpointCapabilities is the type associated with the capabilities 749 // supported by a link-layer endpoint. It is a set of bitfields. 750 type LinkEndpointCapabilities uint 751 752 // The following are the supported link endpoint capabilities. 753 const ( 754 CapabilityNone LinkEndpointCapabilities = 0 755 // CapabilityTXChecksumOffload indicates that the link endpoint supports 756 // checksum computation for outgoing packets and the stack can skip 757 // computing checksums when sending packets. 758 CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota 759 // CapabilityRXChecksumOffload indicates that the link endpoint supports 760 // checksum verification on received packets and that it's safe for the 761 // stack to skip checksum verification. 762 CapabilityRXChecksumOffload 763 CapabilityResolutionRequired 764 CapabilitySaveRestore 765 CapabilityDisconnectOk 766 CapabilityLoopback 767 ) 768 769 // NetworkLinkEndpoint is a data-link layer that supports sending network 770 // layer packets. 771 type NetworkLinkEndpoint interface { 772 // MTU is the maximum transmission unit for this endpoint. This is 773 // usually dictated by the backing physical network; when such a 774 // physical network doesn't exist, the limit is generally 64k, which 775 // includes the maximum size of an IP packet. 776 MTU() uint32 777 778 // MaxHeaderLength returns the maximum size the data link (and 779 // lower level layers combined) headers can have. Higher levels use this 780 // information to reserve space in the front of the packets they're 781 // building. 782 MaxHeaderLength() uint16 783 784 // LinkAddress returns the link address (typically a MAC) of the 785 // endpoint. 786 LinkAddress() tcpip.LinkAddress 787 } 788 789 // LinkEndpoint is the interface implemented by data link layer protocols (e.g., 790 // ethernet, loopback, raw) and used by network layer protocols to send packets 791 // out through the implementer's data link endpoint. When a link header exists, 792 // it sets each PacketBuffer's LinkHeader field before passing it up the 793 // stack. 794 type LinkEndpoint interface { 795 NetworkLinkEndpoint 796 797 // Capabilities returns the set of capabilities supported by the 798 // endpoint. 799 Capabilities() LinkEndpointCapabilities 800 801 // Attach attaches the data link layer endpoint to the network-layer 802 // dispatcher of the stack. 803 // 804 // Attach is called with a nil dispatcher when the endpoint's NIC is being 805 // removed. 806 Attach(dispatcher NetworkDispatcher) 807 808 // IsAttached returns whether a NetworkDispatcher is attached to the 809 // endpoint. 810 IsAttached() bool 811 812 // Wait waits for any worker goroutines owned by the endpoint to stop. 813 // 814 // For now, requesting that an endpoint's worker goroutine(s) stop is 815 // implementation specific. 816 // 817 // Wait will not block if the endpoint hasn't started any goroutines 818 // yet, even if it might later. 819 Wait() 820 821 // ARPHardwareType returns the ARPHRD_TYPE of the link endpoint. 822 // 823 // See: 824 // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30 825 ARPHardwareType() header.ARPHardwareType 826 827 // AddHeader adds a link layer header to pkt if required. 828 AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 829 830 // WritePacket writes a packet with the given protocol and route. 831 // 832 // WritePacket takes ownership of the packet buffer. The packet buffer's 833 // network and transport header must be set. 834 // 835 // To participate in transparent bridging, a LinkEndpoint implementation 836 // should call eth.Encode with header.EthernetFields.SrcAddr set to 837 // r.LocalLinkAddress if it is provided. 838 WritePacket(RouteInfo, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error 839 840 // WritePackets writes packets with the given protocol and route. Must not be 841 // called with an empty list of packet buffers. 842 // 843 // WritePackets takes ownership of the packet buffers. 844 // 845 // Right now, WritePackets is used only when the software segmentation 846 // offload is enabled. If it will be used for something else, syscall filters 847 // may need to be updated. 848 WritePackets(RouteInfo, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) 849 } 850 851 // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are 852 // delivered via the Inject method. 853 type InjectableLinkEndpoint interface { 854 LinkEndpoint 855 856 // InjectInbound injects an inbound packet. 857 InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 858 859 // InjectOutbound writes a fully formed outbound packet directly to the 860 // link. 861 // 862 // dest is used by endpoints with multiple raw destinations. 863 InjectOutbound(dest tcpip.Address, packet []byte) tcpip.Error 864 } 865 866 // DADResult is a marker interface for the result of a duplicate address 867 // detection process. 868 type DADResult interface { 869 isDADResult() 870 } 871 872 var _ DADResult = (*DADSucceeded)(nil) 873 874 // DADSucceeded indicates DAD completed without finding any duplicate addresses. 875 type DADSucceeded struct{} 876 877 func (*DADSucceeded) isDADResult() {} 878 879 var _ DADResult = (*DADError)(nil) 880 881 // DADError indicates DAD hit an error. 882 type DADError struct { 883 Err tcpip.Error 884 } 885 886 func (*DADError) isDADResult() {} 887 888 var _ DADResult = (*DADAborted)(nil) 889 890 // DADAborted indicates DAD was aborted. 891 type DADAborted struct{} 892 893 func (*DADAborted) isDADResult() {} 894 895 var _ DADResult = (*DADDupAddrDetected)(nil) 896 897 // DADDupAddrDetected indicates DAD detected a duplicate address. 898 type DADDupAddrDetected struct { 899 // HolderLinkAddress is the link address of the node that holds the duplicate 900 // address. 901 HolderLinkAddress tcpip.LinkAddress 902 } 903 904 func (*DADDupAddrDetected) isDADResult() {} 905 906 // DADCompletionHandler is a handler for DAD completion. 907 type DADCompletionHandler func(DADResult) 908 909 // DADCheckAddressDisposition enumerates the possible return values from 910 // DAD.CheckDuplicateAddress. 911 type DADCheckAddressDisposition int 912 913 const ( 914 _ DADCheckAddressDisposition = iota 915 916 // DADDisabled indicates that DAD is disabled. 917 DADDisabled 918 919 // DADStarting indicates that DAD is starting for an address. 920 DADStarting 921 922 // DADAlreadyRunning indicates that DAD was already started for an address. 923 DADAlreadyRunning 924 ) 925 926 const ( 927 // defaultDupAddrDetectTransmits is the default number of NDP Neighbor 928 // Solicitation messages to send when doing Duplicate Address Detection 929 // for a tentative address. 930 // 931 // Default = 1 (from RFC 4862 section 5.1) 932 defaultDupAddrDetectTransmits = 1 933 ) 934 935 // DADConfigurations holds configurations for duplicate address detection. 936 type DADConfigurations struct { 937 // The number of Neighbor Solicitation messages to send when doing 938 // Duplicate Address Detection for a tentative address. 939 // 940 // Note, a value of zero effectively disables DAD. 941 DupAddrDetectTransmits uint8 942 943 // The amount of time to wait between sending Neighbor Solicitation 944 // messages. 945 // 946 // Must be greater than or equal to 1ms. 947 RetransmitTimer time.Duration 948 } 949 950 // DefaultDADConfigurations returns the default DAD configurations. 951 func DefaultDADConfigurations() DADConfigurations { 952 return DADConfigurations{ 953 DupAddrDetectTransmits: defaultDupAddrDetectTransmits, 954 RetransmitTimer: defaultRetransmitTimer, 955 } 956 } 957 958 // Validate modifies the configuration with valid values. If invalid values are 959 // present in the configurations, the corresponding default values are used 960 // instead. 961 func (c *DADConfigurations) Validate() { 962 if c.RetransmitTimer < minimumRetransmitTimer { 963 c.RetransmitTimer = defaultRetransmitTimer 964 } 965 } 966 967 // DuplicateAddressDetector handles checking if an address is already assigned 968 // to some neighboring node on the link. 969 type DuplicateAddressDetector interface { 970 // CheckDuplicateAddress checks if an address is assigned to a neighbor. 971 // 972 // If DAD is already being performed for the address, the handler will be 973 // called with the result of the original DAD request. 974 CheckDuplicateAddress(tcpip.Address, DADCompletionHandler) DADCheckAddressDisposition 975 976 // SetDADConfigurations sets the configurations for DAD. 977 SetDADConfigurations(c DADConfigurations) 978 979 // DuplicateAddressProtocol returns the network protocol the receiver can 980 // perform duplicate address detection for. 981 DuplicateAddressProtocol() tcpip.NetworkProtocolNumber 982 } 983 984 // LinkAddressResolver handles link address resolution for a network protocol. 985 type LinkAddressResolver interface { 986 // LinkAddressRequest sends a request for the link address of the target 987 // address. The request is broadcast on the local network if a remote link 988 // address is not provided. 989 LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error 990 991 // ResolveStaticAddress attempts to resolve address without sending 992 // requests. It either resolves the name immediately or returns the 993 // empty LinkAddress. 994 // 995 // It can be used to resolve broadcast addresses for example. 996 ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) 997 998 // LinkAddressProtocol returns the network protocol of the 999 // addresses this resolver can resolve. 1000 LinkAddressProtocol() tcpip.NetworkProtocolNumber 1001 } 1002 1003 // RawFactory produces endpoints for writing various types of raw packets. 1004 type RawFactory interface { 1005 // NewUnassociatedEndpoint produces endpoints for writing packets not 1006 // associated with a particular transport protocol. Such endpoints can 1007 // be used to write arbitrary packets that include the network header. 1008 NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) 1009 1010 // NewPacketEndpoint produces endpoints for reading and writing packets 1011 // that include network and (when cooked is false) link layer headers. 1012 NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) 1013 } 1014 1015 // GSOType is the type of GSO segments. 1016 // 1017 // +stateify savable 1018 type GSOType int 1019 1020 // Types of gso segments. 1021 const ( 1022 GSONone GSOType = iota 1023 1024 // Hardware GSO types: 1025 GSOTCPv4 1026 GSOTCPv6 1027 1028 // GSOSW is used for software GSO segments which have to be sent by 1029 // endpoint.WritePackets. 1030 GSOSW 1031 ) 1032 1033 // GSO contains generic segmentation offload properties. 1034 // 1035 // +stateify savable 1036 type GSO struct { 1037 // Type is one of GSONone, GSOTCPv4, etc. 1038 Type GSOType 1039 // NeedsCsum is set if the checksum offload is enabled. 1040 NeedsCsum bool 1041 // CsumOffset is offset after that to place checksum. 1042 CsumOffset uint16 1043 1044 // Mss is maximum segment size. 1045 MSS uint16 1046 // L3Len is L3 (IP) header length. 1047 L3HdrLen uint16 1048 1049 // MaxSize is maximum GSO packet size. 1050 MaxSize uint32 1051 } 1052 1053 // SupportedGSO returns the type of segmentation offloading supported. 1054 type SupportedGSO int 1055 1056 const ( 1057 // GSONotSupported indicates that segmentation offloading is not supported. 1058 GSONotSupported SupportedGSO = iota 1059 1060 // HWGSOSupported indicates that segmentation offloading may be performed by 1061 // the hardware. 1062 HWGSOSupported 1063 1064 // SWGSOSupported indicates that segmentation offloading may be performed in 1065 // software. 1066 SWGSOSupported 1067 ) 1068 1069 // GSOEndpoint provides access to GSO properties. 1070 type GSOEndpoint interface { 1071 // GSOMaxSize returns the maximum GSO packet size. 1072 GSOMaxSize() uint32 1073 1074 // SupportedGSO returns the supported segmentation offloading. 1075 SupportedGSO() SupportedGSO 1076 } 1077 1078 // SoftwareGSOMaxSize is a maximum allowed size of a software GSO segment. 1079 // This isn't a hard limit, because it is never set into packet headers. 1080 const SoftwareGSOMaxSize = 1 << 16