inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/stack/registration.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stack 16 17 import ( 18 "fmt" 19 "time" 20 21 "inet.af/netstack/tcpip" 22 "inet.af/netstack/tcpip/buffer" 23 "inet.af/netstack/tcpip/header" 24 "inet.af/netstack/waiter" 25 ) 26 27 // NetworkEndpointID is the identifier of a network layer protocol endpoint. 28 // Currently the local address is sufficient because all supported protocols 29 // (i.e., IPv4 and IPv6) have different sizes for their addresses. 30 type NetworkEndpointID struct { 31 LocalAddress tcpip.Address 32 } 33 34 // TransportEndpointID is the identifier of a transport layer protocol endpoint. 35 // 36 // +stateify savable 37 type TransportEndpointID struct { 38 // LocalPort is the local port associated with the endpoint. 39 LocalPort uint16 40 41 // LocalAddress is the local [network layer] address associated with 42 // the endpoint. 43 LocalAddress tcpip.Address 44 45 // RemotePort is the remote port associated with the endpoint. 46 RemotePort uint16 47 48 // RemoteAddress it the remote [network layer] address associated with 49 // the endpoint. 50 RemoteAddress tcpip.Address 51 } 52 53 // NetworkPacketInfo holds information about a network layer packet. 54 type NetworkPacketInfo struct { 55 // LocalAddressBroadcast is true if the packet's local address is a broadcast 56 // address. 57 LocalAddressBroadcast bool 58 59 // IsForwardedPacket is true if the packet is being forwarded. 60 IsForwardedPacket bool 61 } 62 63 // TransportErrorKind enumerates error types that are handled by the transport 64 // layer. 65 type TransportErrorKind int 66 67 const ( 68 // PacketTooBigTransportError indicates that a packet did not reach its 69 // destination because a link on the path to the destination had an MTU that 70 // was too small to carry the packet. 71 PacketTooBigTransportError TransportErrorKind = iota 72 73 // DestinationHostUnreachableTransportError indicates that the destination 74 // host was unreachable. 75 DestinationHostUnreachableTransportError 76 77 // DestinationPortUnreachableTransportError indicates that a packet reached 78 // the destination host, but the transport protocol was not active on the 79 // destination port. 80 DestinationPortUnreachableTransportError 81 82 // DestinationNetworkUnreachableTransportError indicates that the destination 83 // network was unreachable. 84 DestinationNetworkUnreachableTransportError 85 ) 86 87 // TransportError is a marker interface for errors that may be handled by the 88 // transport layer. 89 type TransportError interface { 90 tcpip.SockErrorCause 91 92 // Kind returns the type of the transport error. 93 Kind() TransportErrorKind 94 } 95 96 // TransportEndpoint is the interface that needs to be implemented by transport 97 // protocol (e.g., tcp, udp) endpoints that can handle packets. 98 type TransportEndpoint interface { 99 // UniqueID returns an unique ID for this transport endpoint. 100 UniqueID() uint64 101 102 // HandlePacket is called by the stack when new packets arrive to this 103 // transport endpoint. It sets the packet buffer's transport header. 104 // 105 // HandlePacket may modify the packet. 106 HandlePacket(TransportEndpointID, *PacketBuffer) 107 108 // HandleError is called when the transport endpoint receives an error. 109 // 110 // HandleError takes may modify the packet buffer. 111 HandleError(TransportError, *PacketBuffer) 112 113 // Abort initiates an expedited endpoint teardown. It puts the endpoint 114 // in a closed state and frees all resources associated with it. This 115 // cleanup may happen asynchronously. Wait can be used to block on this 116 // asynchronous cleanup. 117 Abort() 118 119 // Wait waits for any worker goroutines owned by the endpoint to stop. 120 // 121 // An endpoint can be requested to stop its worker goroutines by calling 122 // its Close method. 123 // 124 // Wait will not block if the endpoint hasn't started any goroutines 125 // yet, even if it might later. 126 Wait() 127 } 128 129 // RawTransportEndpoint is the interface that needs to be implemented by raw 130 // transport protocol endpoints. RawTransportEndpoints receive the entire 131 // packet - including the network and transport headers - as delivered to 132 // netstack. 133 type RawTransportEndpoint interface { 134 // HandlePacket is called by the stack when new packets arrive to 135 // this transport endpoint. The packet contains all data from the link 136 // layer up. 137 // 138 // HandlePacket may modify the packet. 139 HandlePacket(*PacketBuffer) 140 } 141 142 // PacketEndpoint is the interface that needs to be implemented by packet 143 // transport protocol endpoints. These endpoints receive link layer headers in 144 // addition to whatever they contain (usually network and transport layer 145 // headers and a payload). 146 type PacketEndpoint interface { 147 // HandlePacket is called by the stack when new packets arrive that 148 // match the endpoint. 149 // 150 // Implementers should treat packet as immutable and should copy it 151 // before before modification. 152 // 153 // linkHeader may have a length of 0, in which case the PacketEndpoint 154 // should construct its own ethernet header for applications. 155 // 156 // HandlePacket may modify pkt. 157 HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 158 } 159 160 // UnknownDestinationPacketDisposition enumerates the possible return values from 161 // HandleUnknownDestinationPacket(). 162 type UnknownDestinationPacketDisposition int 163 164 const ( 165 // UnknownDestinationPacketMalformed denotes that the packet was malformed 166 // and no further processing should be attempted other than updating 167 // statistics. 168 UnknownDestinationPacketMalformed UnknownDestinationPacketDisposition = iota 169 170 // UnknownDestinationPacketUnhandled tells the caller that the packet was 171 // well formed but that the issue was not handled and the stack should take 172 // the default action. 173 UnknownDestinationPacketUnhandled 174 175 // UnknownDestinationPacketHandled tells the caller that it should do 176 // no further processing. 177 UnknownDestinationPacketHandled 178 ) 179 180 // TransportProtocol is the interface that needs to be implemented by transport 181 // protocols (e.g., tcp, udp) that want to be part of the networking stack. 182 type TransportProtocol interface { 183 // Number returns the transport protocol number. 184 Number() tcpip.TransportProtocolNumber 185 186 // NewEndpoint creates a new endpoint of the transport protocol. 187 NewEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) 188 189 // NewRawEndpoint creates a new raw endpoint of the transport protocol. 190 NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) 191 192 // MinimumPacketSize returns the minimum valid packet size of this 193 // transport protocol. The stack automatically drops any packets smaller 194 // than this targeted at this protocol. 195 MinimumPacketSize() int 196 197 // ParsePorts returns the source and destination ports stored in a 198 // packet of this protocol. 199 ParsePorts(v buffer.View) (src, dst uint16, err tcpip.Error) 200 201 // HandleUnknownDestinationPacket handles packets targeted at this 202 // protocol that don't match any existing endpoint. For example, 203 // it is targeted at a port that has no listeners. 204 // 205 // HandleUnknownDestinationPacket may modify the packet if it handles 206 // the issue. 207 HandleUnknownDestinationPacket(TransportEndpointID, *PacketBuffer) UnknownDestinationPacketDisposition 208 209 // SetOption allows enabling/disabling protocol specific features. 210 // SetOption returns an error if the option is not supported or the 211 // provided option value is invalid. 212 SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error 213 214 // Option allows retrieving protocol specific option values. 215 // Option returns an error if the option is not supported or the 216 // provided option value is invalid. 217 Option(option tcpip.GettableTransportProtocolOption) tcpip.Error 218 219 // Close requests that any worker goroutines owned by the protocol 220 // stop. 221 Close() 222 223 // Wait waits for any worker goroutines owned by the protocol to stop. 224 Wait() 225 226 // Parse sets pkt.TransportHeader and trims pkt.Data appropriately. It does 227 // neither and returns false if pkt.Data is too small, i.e. pkt.Data.Size() < 228 // MinimumPacketSize() 229 Parse(pkt *PacketBuffer) (ok bool) 230 } 231 232 // TransportPacketDisposition is the result from attempting to deliver a packet 233 // to the transport layer. 234 type TransportPacketDisposition int 235 236 const ( 237 // TransportPacketHandled indicates that a transport packet was handled by the 238 // transport layer and callers need not take any further action. 239 TransportPacketHandled TransportPacketDisposition = iota 240 241 // TransportPacketProtocolUnreachable indicates that the transport 242 // protocol requested in the packet is not supported. 243 TransportPacketProtocolUnreachable 244 245 // TransportPacketDestinationPortUnreachable indicates that there weren't any 246 // listeners interested in the packet and the transport protocol has no means 247 // to notify the sender. 248 TransportPacketDestinationPortUnreachable 249 ) 250 251 // TransportDispatcher contains the methods used by the network stack to deliver 252 // packets to the appropriate transport endpoint after it has been handled by 253 // the network layer. 254 type TransportDispatcher interface { 255 // DeliverTransportPacket delivers packets to the appropriate 256 // transport protocol endpoint. 257 // 258 // pkt.NetworkHeader must be set before calling DeliverTransportPacket. 259 // 260 // DeliverTransportPacket may modify the packet. 261 DeliverTransportPacket(tcpip.TransportProtocolNumber, *PacketBuffer) TransportPacketDisposition 262 263 // DeliverTransportError delivers an error to the appropriate transport 264 // endpoint. 265 // 266 // DeliverTransportError may modify the packet buffer. 267 DeliverTransportError(local, remote tcpip.Address, _ tcpip.NetworkProtocolNumber, _ tcpip.TransportProtocolNumber, _ TransportError, _ *PacketBuffer) 268 269 // DeliverRawPacket delivers a packet to any subscribed raw sockets. 270 // 271 // DeliverRawPacket does NOT take ownership of the packet buffer. 272 DeliverRawPacket(tcpip.TransportProtocolNumber, *PacketBuffer) 273 } 274 275 // PacketLooping specifies where an outbound packet should be sent. 276 type PacketLooping byte 277 278 const ( 279 // PacketOut indicates that the packet should be passed to the link 280 // endpoint. 281 PacketOut PacketLooping = 1 << iota 282 283 // PacketLoop indicates that the packet should be handled locally. 284 PacketLoop 285 ) 286 287 // NetworkHeaderParams are the header parameters given as input by the 288 // transport endpoint to the network. 289 type NetworkHeaderParams struct { 290 // Protocol refers to the transport protocol number. 291 Protocol tcpip.TransportProtocolNumber 292 293 // TTL refers to Time To Live field of the IP-header. 294 TTL uint8 295 296 // TOS refers to TypeOfService or TrafficClass field of the IP-header. 297 TOS uint8 298 } 299 300 // GroupAddressableEndpoint is an endpoint that supports group addressing. 301 // 302 // An endpoint is considered to support group addressing when one or more 303 // endpoints may associate themselves with the same identifier (group address). 304 type GroupAddressableEndpoint interface { 305 // JoinGroup joins the specified group. 306 JoinGroup(group tcpip.Address) tcpip.Error 307 308 // LeaveGroup attempts to leave the specified group. 309 LeaveGroup(group tcpip.Address) tcpip.Error 310 311 // IsInGroup returns true if the endpoint is a member of the specified group. 312 IsInGroup(group tcpip.Address) bool 313 } 314 315 // PrimaryEndpointBehavior is an enumeration of an AddressEndpoint's primary 316 // behavior. 317 type PrimaryEndpointBehavior int 318 319 const ( 320 // CanBePrimaryEndpoint indicates the endpoint can be used as a primary 321 // endpoint for new connections with no local address. 322 CanBePrimaryEndpoint PrimaryEndpointBehavior = iota 323 324 // FirstPrimaryEndpoint indicates the endpoint should be the first 325 // primary endpoint considered. If there are multiple endpoints with 326 // this behavior, they are ordered by recency. 327 FirstPrimaryEndpoint 328 329 // NeverPrimaryEndpoint indicates the endpoint should never be a 330 // primary endpoint. 331 NeverPrimaryEndpoint 332 ) 333 334 func (peb PrimaryEndpointBehavior) String() string { 335 switch peb { 336 case CanBePrimaryEndpoint: 337 return "CanBePrimaryEndpoint" 338 case FirstPrimaryEndpoint: 339 return "FirstPrimaryEndpoint" 340 case NeverPrimaryEndpoint: 341 return "NeverPrimaryEndpoint" 342 default: 343 panic(fmt.Sprintf("unknown primary endpoint behavior: %d", peb)) 344 } 345 } 346 347 // AddressConfigType is the method used to add an address. 348 type AddressConfigType int 349 350 const ( 351 // AddressConfigStatic is a statically configured address endpoint that was 352 // added by some user-specified action (adding an explicit address, joining a 353 // multicast group). 354 AddressConfigStatic AddressConfigType = iota 355 356 // AddressConfigSlaac is an address endpoint added by SLAAC, as per RFC 4862 357 // section 5.5.3. 358 AddressConfigSlaac 359 360 // AddressConfigSlaacTemp is a temporary address endpoint added by SLAAC as 361 // per RFC 4941. Temporary SLAAC addresses are short-lived and are not 362 // to be valid (or preferred) forever; hence the term temporary. 363 AddressConfigSlaacTemp 364 ) 365 366 // AddressProperties contains additional properties that can be configured when 367 // adding an address. 368 type AddressProperties struct { 369 PEB PrimaryEndpointBehavior 370 ConfigType AddressConfigType 371 Deprecated bool 372 } 373 374 // AssignableAddressEndpoint is a reference counted address endpoint that may be 375 // assigned to a NetworkEndpoint. 376 type AssignableAddressEndpoint interface { 377 // AddressWithPrefix returns the endpoint's address. 378 AddressWithPrefix() tcpip.AddressWithPrefix 379 380 // Subnet returns the subnet of the endpoint's address. 381 Subnet() tcpip.Subnet 382 383 // IsAssigned returns whether or not the endpoint is considered bound 384 // to its NetworkEndpoint. 385 IsAssigned(allowExpired bool) bool 386 387 // IncRef increments this endpoint's reference count. 388 // 389 // Returns true if it was successfully incremented. If it returns false, then 390 // the endpoint is considered expired and should no longer be used. 391 IncRef() bool 392 393 // DecRef decrements this endpoint's reference count. 394 DecRef() 395 } 396 397 // AddressEndpoint is an endpoint representing an address assigned to an 398 // AddressableEndpoint. 399 type AddressEndpoint interface { 400 AssignableAddressEndpoint 401 402 // GetKind returns the address kind for this endpoint. 403 GetKind() AddressKind 404 405 // SetKind sets the address kind for this endpoint. 406 SetKind(AddressKind) 407 408 // ConfigType returns the method used to add the address. 409 ConfigType() AddressConfigType 410 411 // Deprecated returns whether or not this endpoint is deprecated. 412 Deprecated() bool 413 414 // SetDeprecated sets this endpoint's deprecated status. 415 SetDeprecated(bool) 416 } 417 418 // AddressKind is the kind of an address. 419 // 420 // See the values of AddressKind for more details. 421 type AddressKind int 422 423 const ( 424 // PermanentTentative is a permanent address endpoint that is not yet 425 // considered to be fully bound to an interface in the traditional 426 // sense. That is, the address is associated with a NIC, but packets 427 // destined to the address MUST NOT be accepted and MUST be silently 428 // dropped, and the address MUST NOT be used as a source address for 429 // outgoing packets. For IPv6, addresses are of this kind until NDP's 430 // Duplicate Address Detection (DAD) resolves. If DAD fails, the address 431 // is removed. 432 PermanentTentative AddressKind = iota 433 434 // Permanent is a permanent endpoint (vs. a temporary one) assigned to the 435 // NIC. Its reference count is biased by 1 to avoid removal when no route 436 // holds a reference to it. It is removed by explicitly removing the address 437 // from the NIC. 438 Permanent 439 440 // PermanentExpired is a permanent endpoint that had its address removed from 441 // the NIC, and it is waiting to be removed once no references to it are held. 442 // 443 // If the address is re-added before the endpoint is removed, its type 444 // changes back to Permanent. 445 PermanentExpired 446 447 // Temporary is an endpoint, created on a one-off basis to temporarily 448 // consider the NIC bound an an address that it is not explicitly bound to 449 // (such as a permanent address). Its reference count must not be biased by 1 450 // so that the address is removed immediately when references to it are no 451 // longer held. 452 // 453 // A temporary endpoint may be promoted to permanent if the address is added 454 // permanently. 455 Temporary 456 ) 457 458 // IsPermanent returns true if the AddressKind represents a permanent address. 459 func (k AddressKind) IsPermanent() bool { 460 switch k { 461 case Permanent, PermanentTentative: 462 return true 463 case Temporary, PermanentExpired: 464 return false 465 default: 466 panic(fmt.Sprintf("unrecognized address kind = %d", k)) 467 } 468 } 469 470 // AddressableEndpoint is an endpoint that supports addressing. 471 // 472 // An endpoint is considered to support addressing when the endpoint may 473 // associate itself with an identifier (address). 474 type AddressableEndpoint interface { 475 // AddAndAcquirePermanentAddress adds the passed permanent address. 476 // 477 // Returns *tcpip.ErrDuplicateAddress if the address exists. 478 // 479 // Acquires and returns the AddressEndpoint for the added address. 480 AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties AddressProperties) (AddressEndpoint, tcpip.Error) 481 482 // RemovePermanentAddress removes the passed address if it is a permanent 483 // address. 484 // 485 // Returns *tcpip.ErrBadLocalAddress if the endpoint does not have the passed 486 // permanent address. 487 RemovePermanentAddress(addr tcpip.Address) tcpip.Error 488 489 // MainAddress returns the endpoint's primary permanent address. 490 MainAddress() tcpip.AddressWithPrefix 491 492 // AcquireAssignedAddress returns an address endpoint for the passed address 493 // that is considered bound to the endpoint, optionally creating a temporary 494 // endpoint if requested and no existing address exists. 495 // 496 // The returned endpoint's reference count is incremented. 497 // 498 // Returns nil if the specified address is not local to this endpoint. 499 AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint 500 501 // AcquireOutgoingPrimaryAddress returns a primary address that may be used as 502 // a source address when sending packets to the passed remote address. 503 // 504 // If allowExpired is true, expired addresses may be returned. 505 // 506 // The returned endpoint's reference count is incremented. 507 // 508 // Returns nil if a primary address is not available. 509 AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) AddressEndpoint 510 511 // PrimaryAddresses returns the primary addresses. 512 PrimaryAddresses() []tcpip.AddressWithPrefix 513 514 // PermanentAddresses returns all the permanent addresses. 515 PermanentAddresses() []tcpip.AddressWithPrefix 516 } 517 518 // NDPEndpoint is a network endpoint that supports NDP. 519 type NDPEndpoint interface { 520 NetworkEndpoint 521 522 // InvalidateDefaultRouter invalidates a default router discovered through 523 // NDP. 524 InvalidateDefaultRouter(tcpip.Address) 525 } 526 527 // NetworkInterface is a network interface. 528 type NetworkInterface interface { 529 NetworkLinkEndpoint 530 531 // ID returns the interface's ID. 532 ID() tcpip.NICID 533 534 // IsLoopback returns true if the interface is a loopback interface. 535 IsLoopback() bool 536 537 // Name returns the name of the interface. 538 // 539 // May return an empty string if the interface is not configured with a name. 540 Name() string 541 542 // Enabled returns true if the interface is enabled. 543 Enabled() bool 544 545 // Promiscuous returns true if the interface is in promiscuous mode. 546 // 547 // When in promiscuous mode, the interface should accept all packets. 548 Promiscuous() bool 549 550 // Spoofing returns true if the interface is in spoofing mode. 551 // 552 // When in spoofing mode, the interface should consider all addresses as 553 // assigned to it. 554 Spoofing() bool 555 556 // PrimaryAddress returns the primary address associated with the interface. 557 // 558 // PrimaryAddress will return the first non-deprecated address if such an 559 // address exists. If no non-deprecated addresses exist, the first deprecated 560 // address will be returned. If no deprecated addresses exist, the zero value 561 // will be returned. 562 PrimaryAddress(tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) 563 564 // CheckLocalAddress returns true if the address exists on the interface. 565 CheckLocalAddress(tcpip.NetworkProtocolNumber, tcpip.Address) bool 566 567 // WritePacketToRemote writes the packet to the given remote link address. 568 WritePacketToRemote(tcpip.LinkAddress, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error 569 570 // WritePacket writes a packet with the given protocol through the given 571 // route. 572 // 573 // WritePacket may modify the packet buffer. The packet buffer's 574 // network and transport header must be set. 575 WritePacket(*Route, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error 576 577 // WritePackets writes packets with the given protocol through the given 578 // route. Must not be called with an empty list of packet buffers. 579 // 580 // WritePackets may modify the packet buffers. 581 // 582 // Right now, WritePackets is used only when the software segmentation 583 // offload is enabled. If it will be used for something else, syscall filters 584 // may need to be updated. 585 WritePackets(*Route, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) 586 587 // HandleNeighborProbe processes an incoming neighbor probe (e.g. ARP 588 // request or NDP Neighbor Solicitation). 589 // 590 // HandleNeighborProbe assumes that the probe is valid for the network 591 // interface the probe was received on. 592 HandleNeighborProbe(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress) tcpip.Error 593 594 // HandleNeighborConfirmation processes an incoming neighbor confirmation 595 // (e.g. ARP reply or NDP Neighbor Advertisement). 596 HandleNeighborConfirmation(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress, ReachabilityConfirmationFlags) tcpip.Error 597 } 598 599 // LinkResolvableNetworkEndpoint handles link resolution events. 600 type LinkResolvableNetworkEndpoint interface { 601 // HandleLinkResolutionFailure is called when link resolution prevents the 602 // argument from having been sent. 603 HandleLinkResolutionFailure(*PacketBuffer) 604 } 605 606 // NetworkEndpoint is the interface that needs to be implemented by endpoints 607 // of network layer protocols (e.g., ipv4, ipv6). 608 type NetworkEndpoint interface { 609 // Enable enables the endpoint. 610 // 611 // Must only be called when the stack is in a state that allows the endpoint 612 // to send and receive packets. 613 // 614 // Returns *tcpip.ErrNotPermitted if the endpoint cannot be enabled. 615 Enable() tcpip.Error 616 617 // Enabled returns true if the endpoint is enabled. 618 Enabled() bool 619 620 // Disable disables the endpoint. 621 Disable() 622 623 // DefaultTTL is the default time-to-live value (or hop limit, in ipv6) 624 // for this endpoint. 625 DefaultTTL() uint8 626 627 // MTU is the maximum transmission unit for this endpoint. This is 628 // generally calculated as the MTU of the underlying data link endpoint 629 // minus the network endpoint max header length. 630 MTU() uint32 631 632 // MaxHeaderLength returns the maximum size the network (and lower 633 // level layers combined) headers can have. Higher levels use this 634 // information to reserve space in the front of the packets they're 635 // building. 636 MaxHeaderLength() uint16 637 638 // WritePacket writes a packet to the given destination address and 639 // protocol. It may modify pkt. pkt.TransportHeader must have 640 // already been set. 641 WritePacket(r *Route, params NetworkHeaderParams, pkt *PacketBuffer) tcpip.Error 642 643 // WritePackets writes packets to the given destination address and 644 // protocol. pkts must not be zero length. It may modify pkts and 645 // underlying packets. 646 WritePackets(r *Route, pkts PacketBufferList, params NetworkHeaderParams) (int, tcpip.Error) 647 648 // WriteHeaderIncludedPacket writes a packet that includes a network 649 // header to the given destination address. It may modify pkt. 650 WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) tcpip.Error 651 652 // HandlePacket is called by the link layer when new packets arrive to 653 // this network endpoint. It sets pkt.NetworkHeader. 654 // 655 // HandlePacket may modify pkt. 656 HandlePacket(pkt *PacketBuffer) 657 658 // Close is called when the endpoint is removed from a stack. 659 Close() 660 661 // NetworkProtocolNumber returns the tcpip.NetworkProtocolNumber for 662 // this endpoint. 663 NetworkProtocolNumber() tcpip.NetworkProtocolNumber 664 665 // Stats returns a reference to the network endpoint stats. 666 Stats() NetworkEndpointStats 667 } 668 669 // NetworkEndpointStats is the interface implemented by each network endpoint 670 // stats struct. 671 type NetworkEndpointStats interface { 672 // IsNetworkEndpointStats is an empty method to implement the 673 // NetworkEndpointStats marker interface. 674 IsNetworkEndpointStats() 675 } 676 677 // IPNetworkEndpointStats is a NetworkEndpointStats that tracks IP-related 678 // statistics. 679 type IPNetworkEndpointStats interface { 680 NetworkEndpointStats 681 682 // IPStats returns the IP statistics of a network endpoint. 683 IPStats() *tcpip.IPStats 684 } 685 686 // ForwardingNetworkEndpoint is a network endpoint that may forward packets. 687 type ForwardingNetworkEndpoint interface { 688 NetworkEndpoint 689 690 // Forwarding returns the forwarding configuration. 691 Forwarding() bool 692 693 // SetForwarding sets the forwarding configuration. 694 SetForwarding(bool) 695 } 696 697 // NetworkProtocol is the interface that needs to be implemented by network 698 // protocols (e.g., ipv4, ipv6) that want to be part of the networking stack. 699 type NetworkProtocol interface { 700 // Number returns the network protocol number. 701 Number() tcpip.NetworkProtocolNumber 702 703 // MinimumPacketSize returns the minimum valid packet size of this 704 // network protocol. The stack automatically drops any packets smaller 705 // than this targeted at this protocol. 706 MinimumPacketSize() int 707 708 // ParseAddresses returns the source and destination addresses stored in a 709 // packet of this protocol. 710 ParseAddresses(v buffer.View) (src, dst tcpip.Address) 711 712 // NewEndpoint creates a new endpoint of this protocol. 713 NewEndpoint(nic NetworkInterface, dispatcher TransportDispatcher) NetworkEndpoint 714 715 // SetOption allows enabling/disabling protocol specific features. 716 // SetOption returns an error if the option is not supported or the 717 // provided option value is invalid. 718 SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error 719 720 // Option allows retrieving protocol specific option values. 721 // Option returns an error if the option is not supported or the 722 // provided option value is invalid. 723 Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error 724 725 // Close requests that any worker goroutines owned by the protocol 726 // stop. 727 Close() 728 729 // Wait waits for any worker goroutines owned by the protocol to stop. 730 Wait() 731 732 // Parse sets pkt.NetworkHeader and trims pkt.Data appropriately. It 733 // returns: 734 // - The encapsulated protocol, if present. 735 // - Whether there is an encapsulated transport protocol payload (e.g. ARP 736 // does not encapsulate anything). 737 // - Whether pkt.Data was large enough to parse and set pkt.NetworkHeader. 738 Parse(pkt *PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) 739 } 740 741 // NetworkDispatcher contains the methods used by the network stack to deliver 742 // inbound/outbound packets to the appropriate network/packet(if any) endpoints. 743 type NetworkDispatcher interface { 744 // DeliverNetworkPacket finds the appropriate network protocol endpoint 745 // and hands the packet over for further processing. 746 // 747 // pkt.LinkHeader may or may not be set before calling 748 // DeliverNetworkPacket. Some packets do not have link headers (e.g. 749 // packets sent via loopback), and won't have the field set. 750 // 751 // DeliverNetworkPacket may modify pkt. 752 DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 753 } 754 755 // LinkEndpointCapabilities is the type associated with the capabilities 756 // supported by a link-layer endpoint. It is a set of bitfields. 757 type LinkEndpointCapabilities uint 758 759 // The following are the supported link endpoint capabilities. 760 const ( 761 CapabilityNone LinkEndpointCapabilities = 0 762 // CapabilityTXChecksumOffload indicates that the link endpoint supports 763 // checksum computation for outgoing packets and the stack can skip 764 // computing checksums when sending packets. 765 CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota 766 // CapabilityRXChecksumOffload indicates that the link endpoint supports 767 // checksum verification on received packets and that it's safe for the 768 // stack to skip checksum verification. 769 CapabilityRXChecksumOffload 770 CapabilityResolutionRequired 771 CapabilitySaveRestore 772 CapabilityDisconnectOk 773 CapabilityLoopback 774 ) 775 776 // NetworkLinkEndpoint is a data-link layer that supports sending network 777 // layer packets. 778 type NetworkLinkEndpoint interface { 779 // MTU is the maximum transmission unit for this endpoint. This is 780 // usually dictated by the backing physical network; when such a 781 // physical network doesn't exist, the limit is generally 64k, which 782 // includes the maximum size of an IP packet. 783 MTU() uint32 784 785 // MaxHeaderLength returns the maximum size the data link (and 786 // lower level layers combined) headers can have. Higher levels use this 787 // information to reserve space in the front of the packets they're 788 // building. 789 MaxHeaderLength() uint16 790 791 // LinkAddress returns the link address (typically a MAC) of the 792 // endpoint. 793 LinkAddress() tcpip.LinkAddress 794 } 795 796 // LinkEndpoint is the interface implemented by data link layer protocols (e.g., 797 // ethernet, loopback, raw) and used by network layer protocols to send packets 798 // out through the implementer's data link endpoint. When a link header exists, 799 // it sets each PacketBuffer's LinkHeader field before passing it up the 800 // stack. 801 type LinkEndpoint interface { 802 NetworkLinkEndpoint 803 804 // Capabilities returns the set of capabilities supported by the 805 // endpoint. 806 Capabilities() LinkEndpointCapabilities 807 808 // Attach attaches the data link layer endpoint to the network-layer 809 // dispatcher of the stack. 810 // 811 // Attach is called with a nil dispatcher when the endpoint's NIC is being 812 // removed. 813 Attach(dispatcher NetworkDispatcher) 814 815 // IsAttached returns whether a NetworkDispatcher is attached to the 816 // endpoint. 817 IsAttached() bool 818 819 // Wait waits for any worker goroutines owned by the endpoint to stop. 820 // 821 // For now, requesting that an endpoint's worker goroutine(s) stop is 822 // implementation specific. 823 // 824 // Wait will not block if the endpoint hasn't started any goroutines 825 // yet, even if it might later. 826 Wait() 827 828 // ARPHardwareType returns the ARPHRD_TYPE of the link endpoint. 829 // 830 // See: 831 // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30 832 ARPHardwareType() header.ARPHardwareType 833 834 // AddHeader adds a link layer header to pkt if required. 835 AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 836 837 // WritePacket writes a packet with the given protocol and route. 838 // 839 // WritePacket may modify the packet buffer. The packet buffer's 840 // network and transport header must be set. 841 // 842 // To participate in transparent bridging, a LinkEndpoint implementation 843 // should call eth.Encode with header.EthernetFields.SrcAddr set to 844 // r.LocalLinkAddress if it is provided. 845 WritePacket(RouteInfo, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error 846 847 // WritePackets writes packets with the given protocol and route. Must not be 848 // called with an empty list of packet buffers. 849 // 850 // WritePackets may modify the packet buffers. 851 // 852 // Right now, WritePackets is used only when the software segmentation 853 // offload is enabled. If it will be used for something else, syscall filters 854 // may need to be updated. 855 WritePackets(RouteInfo, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error) 856 857 // WriteRawPacket writes a packet directly to the link. 858 // 859 // If the link-layer has its own header, the payload must already include the 860 // header. 861 // 862 // WriteRawPacket may modify the packet. 863 WriteRawPacket(*PacketBuffer) tcpip.Error 864 } 865 866 // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are 867 // delivered via the Inject method. 868 type InjectableLinkEndpoint interface { 869 LinkEndpoint 870 871 // InjectInbound injects an inbound packet. 872 InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) 873 874 // InjectOutbound writes a fully formed outbound packet directly to the 875 // link. 876 // 877 // dest is used by endpoints with multiple raw destinations. 878 InjectOutbound(dest tcpip.Address, packet []byte) tcpip.Error 879 } 880 881 // DADResult is a marker interface for the result of a duplicate address 882 // detection process. 883 type DADResult interface { 884 isDADResult() 885 } 886 887 var _ DADResult = (*DADSucceeded)(nil) 888 889 // DADSucceeded indicates DAD completed without finding any duplicate addresses. 890 type DADSucceeded struct{} 891 892 func (*DADSucceeded) isDADResult() {} 893 894 var _ DADResult = (*DADError)(nil) 895 896 // DADError indicates DAD hit an error. 897 type DADError struct { 898 Err tcpip.Error 899 } 900 901 func (*DADError) isDADResult() {} 902 903 var _ DADResult = (*DADAborted)(nil) 904 905 // DADAborted indicates DAD was aborted. 906 type DADAborted struct{} 907 908 func (*DADAborted) isDADResult() {} 909 910 var _ DADResult = (*DADDupAddrDetected)(nil) 911 912 // DADDupAddrDetected indicates DAD detected a duplicate address. 913 type DADDupAddrDetected struct { 914 // HolderLinkAddress is the link address of the node that holds the duplicate 915 // address. 916 HolderLinkAddress tcpip.LinkAddress 917 } 918 919 func (*DADDupAddrDetected) isDADResult() {} 920 921 // DADCompletionHandler is a handler for DAD completion. 922 type DADCompletionHandler func(DADResult) 923 924 // DADCheckAddressDisposition enumerates the possible return values from 925 // DAD.CheckDuplicateAddress. 926 type DADCheckAddressDisposition int 927 928 const ( 929 _ DADCheckAddressDisposition = iota 930 931 // DADDisabled indicates that DAD is disabled. 932 DADDisabled 933 934 // DADStarting indicates that DAD is starting for an address. 935 DADStarting 936 937 // DADAlreadyRunning indicates that DAD was already started for an address. 938 DADAlreadyRunning 939 ) 940 941 const ( 942 // defaultDupAddrDetectTransmits is the default number of NDP Neighbor 943 // Solicitation messages to send when doing Duplicate Address Detection 944 // for a tentative address. 945 // 946 // Default = 1 (from RFC 4862 section 5.1) 947 defaultDupAddrDetectTransmits = 1 948 ) 949 950 // DADConfigurations holds configurations for duplicate address detection. 951 type DADConfigurations struct { 952 // The number of Neighbor Solicitation messages to send when doing 953 // Duplicate Address Detection for a tentative address. 954 // 955 // Note, a value of zero effectively disables DAD. 956 DupAddrDetectTransmits uint8 957 958 // The amount of time to wait between sending Neighbor Solicitation 959 // messages. 960 // 961 // Must be greater than or equal to 1ms. 962 RetransmitTimer time.Duration 963 } 964 965 // DefaultDADConfigurations returns the default DAD configurations. 966 func DefaultDADConfigurations() DADConfigurations { 967 return DADConfigurations{ 968 DupAddrDetectTransmits: defaultDupAddrDetectTransmits, 969 RetransmitTimer: defaultRetransmitTimer, 970 } 971 } 972 973 // Validate modifies the configuration with valid values. If invalid values are 974 // present in the configurations, the corresponding default values are used 975 // instead. 976 func (c *DADConfigurations) Validate() { 977 if c.RetransmitTimer < minimumRetransmitTimer { 978 c.RetransmitTimer = defaultRetransmitTimer 979 } 980 } 981 982 // DuplicateAddressDetector handles checking if an address is already assigned 983 // to some neighboring node on the link. 984 type DuplicateAddressDetector interface { 985 // CheckDuplicateAddress checks if an address is assigned to a neighbor. 986 // 987 // If DAD is already being performed for the address, the handler will be 988 // called with the result of the original DAD request. 989 CheckDuplicateAddress(tcpip.Address, DADCompletionHandler) DADCheckAddressDisposition 990 991 // SetDADConfigurations sets the configurations for DAD. 992 SetDADConfigurations(c DADConfigurations) 993 994 // DuplicateAddressProtocol returns the network protocol the receiver can 995 // perform duplicate address detection for. 996 DuplicateAddressProtocol() tcpip.NetworkProtocolNumber 997 } 998 999 // LinkAddressResolver handles link address resolution for a network protocol. 1000 type LinkAddressResolver interface { 1001 // LinkAddressRequest sends a request for the link address of the target 1002 // address. The request is broadcast on the local network if a remote link 1003 // address is not provided. 1004 LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error 1005 1006 // ResolveStaticAddress attempts to resolve address without sending 1007 // requests. It either resolves the name immediately or returns the 1008 // empty LinkAddress. 1009 // 1010 // It can be used to resolve broadcast addresses for example. 1011 ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) 1012 1013 // LinkAddressProtocol returns the network protocol of the 1014 // addresses this resolver can resolve. 1015 LinkAddressProtocol() tcpip.NetworkProtocolNumber 1016 } 1017 1018 // RawFactory produces endpoints for writing various types of raw packets. 1019 type RawFactory interface { 1020 // NewUnassociatedEndpoint produces endpoints for writing packets not 1021 // associated with a particular transport protocol. Such endpoints can 1022 // be used to write arbitrary packets that include the network header. 1023 NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) 1024 1025 // NewPacketEndpoint produces endpoints for reading and writing packets 1026 // that include network and (when cooked is false) link layer headers. 1027 NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) 1028 } 1029 1030 // GSOType is the type of GSO segments. 1031 // 1032 // +stateify savable 1033 type GSOType int 1034 1035 // Types of gso segments. 1036 const ( 1037 GSONone GSOType = iota 1038 1039 // Hardware GSO types: 1040 GSOTCPv4 1041 GSOTCPv6 1042 1043 // GSOSW is used for software GSO segments which have to be sent by 1044 // endpoint.WritePackets. 1045 GSOSW 1046 ) 1047 1048 // GSO contains generic segmentation offload properties. 1049 // 1050 // +stateify savable 1051 type GSO struct { 1052 // Type is one of GSONone, GSOTCPv4, etc. 1053 Type GSOType 1054 // NeedsCsum is set if the checksum offload is enabled. 1055 NeedsCsum bool 1056 // CsumOffset is offset after that to place checksum. 1057 CsumOffset uint16 1058 1059 // Mss is maximum segment size. 1060 MSS uint16 1061 // L3Len is L3 (IP) header length. 1062 L3HdrLen uint16 1063 1064 // MaxSize is maximum GSO packet size. 1065 MaxSize uint32 1066 } 1067 1068 // SupportedGSO returns the type of segmentation offloading supported. 1069 type SupportedGSO int 1070 1071 const ( 1072 // GSONotSupported indicates that segmentation offloading is not supported. 1073 GSONotSupported SupportedGSO = iota 1074 1075 // HWGSOSupported indicates that segmentation offloading may be performed by 1076 // the hardware. 1077 HWGSOSupported 1078 1079 // SWGSOSupported indicates that segmentation offloading may be performed in 1080 // software. 1081 SWGSOSupported 1082 ) 1083 1084 // GSOEndpoint provides access to GSO properties. 1085 type GSOEndpoint interface { 1086 // GSOMaxSize returns the maximum GSO packet size. 1087 GSOMaxSize() uint32 1088 1089 // SupportedGSO returns the supported segmentation offloading. 1090 SupportedGSO() SupportedGSO 1091 } 1092 1093 // SoftwareGSOMaxSize is a maximum allowed size of a software GSO segment. 1094 // This isn't a hard limit, because it is never set into packet headers. 1095 const SoftwareGSOMaxSize = 1 << 16