github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/tcpip/tcpip.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tcpip provides the interfaces and related types that users of the 16 // tcpip stack will use in order to create endpoints used to send and receive 17 // data over the network stack. 18 // 19 // The starting point is the creation and configuration of a stack. A stack can 20 // be created by calling the New() function of the tcpip/stack/stack package; 21 // configuring a stack involves creating NICs (via calls to Stack.CreateNIC()), 22 // adding network addresses (via calls to Stack.AddProtocolAddress()), and 23 // setting a route table (via a call to Stack.SetRouteTable()). 24 // 25 // Once a stack is configured, endpoints can be created by calling 26 // Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect 27 // to peers, listen for connections, accept connections, etc., depending on the 28 // transport protocol selected. 29 package tcpip 30 31 import ( 32 "bytes" 33 "errors" 34 "fmt" 35 "io" 36 "math" 37 "math/bits" 38 "reflect" 39 "strconv" 40 "strings" 41 "time" 42 "unsafe" 43 44 "github.com/metacubex/gvisor/pkg/atomicbitops" 45 "github.com/metacubex/gvisor/pkg/sync" 46 "github.com/metacubex/gvisor/pkg/waiter" 47 ) 48 49 // Using the header package here would cause an import cycle. 50 const ( 51 ipv4AddressSize = 4 52 ipv4ProtocolNumber = 0x0800 53 ipv6AddressSize = 16 54 ipv6ProtocolNumber = 0x86dd 55 ) 56 57 // Errors related to Subnet 58 var ( 59 errSubnetLengthMismatch = errors.New("subnet length of address and mask differ") 60 errSubnetAddressMasked = errors.New("subnet address has bits set outside the mask") 61 ) 62 63 // ErrSaveRejection indicates a failed save due to unsupported networking state. 64 // This type of errors is only used for save logic. 65 type ErrSaveRejection struct { 66 Err error 67 } 68 69 // Error returns a sensible description of the save rejection error. 70 func (e *ErrSaveRejection) Error() string { 71 return "save rejected due to unsupported networking state: " + e.Err.Error() 72 } 73 74 // MonotonicTime is a monotonic clock reading. 75 // 76 // +stateify savable 77 type MonotonicTime struct { 78 nanoseconds int64 79 } 80 81 // String implements Stringer. 82 func (mt MonotonicTime) String() string { 83 return strconv.FormatInt(mt.nanoseconds, 10) 84 } 85 86 // MonotonicTimeInfinite returns the monotonic timestamp as far away in the 87 // future as possible. 88 func MonotonicTimeInfinite() MonotonicTime { 89 return MonotonicTime{nanoseconds: math.MaxInt64} 90 } 91 92 // Before reports whether the monotonic clock reading mt is before u. 93 func (mt MonotonicTime) Before(u MonotonicTime) bool { 94 return mt.nanoseconds < u.nanoseconds 95 } 96 97 // After reports whether the monotonic clock reading mt is after u. 98 func (mt MonotonicTime) After(u MonotonicTime) bool { 99 return mt.nanoseconds > u.nanoseconds 100 } 101 102 // Add returns the monotonic clock reading mt+d. 103 func (mt MonotonicTime) Add(d time.Duration) MonotonicTime { 104 return MonotonicTime{ 105 nanoseconds: time.Unix(0, mt.nanoseconds).Add(d).Sub(time.Unix(0, 0)).Nanoseconds(), 106 } 107 } 108 109 // Sub returns the duration mt-u. If the result exceeds the maximum (or minimum) 110 // value that can be stored in a Duration, the maximum (or minimum) duration 111 // will be returned. To compute t-d for a duration d, use t.Add(-d). 112 func (mt MonotonicTime) Sub(u MonotonicTime) time.Duration { 113 return time.Unix(0, mt.nanoseconds).Sub(time.Unix(0, u.nanoseconds)) 114 } 115 116 // Milliseconds returns the time in milliseconds. 117 func (mt MonotonicTime) Milliseconds() int64 { 118 return mt.nanoseconds / 1e6 119 } 120 121 // A Clock provides the current time and schedules work for execution. 122 // 123 // Times returned by a Clock should always be used for application-visible 124 // time. Only monotonic times should be used for netstack internal timekeeping. 125 type Clock interface { 126 // Now returns the current local time. 127 Now() time.Time 128 129 // NowMonotonic returns the current monotonic clock reading. 130 NowMonotonic() MonotonicTime 131 132 // AfterFunc waits for the duration to elapse and then calls f in its own 133 // goroutine. It returns a Timer that can be used to cancel the call using 134 // its Stop method. 135 AfterFunc(d time.Duration, f func()) Timer 136 } 137 138 // Timer represents a single event. A Timer must be created with 139 // Clock.AfterFunc. 140 type Timer interface { 141 // Stop prevents the Timer from firing. It returns true if the call stops the 142 // timer, false if the timer has already expired or been stopped. 143 // 144 // If Stop returns false, then the timer has already expired and the function 145 // f of Clock.AfterFunc(d, f) has been started in its own goroutine; Stop 146 // does not wait for f to complete before returning. If the caller needs to 147 // know whether f is completed, it must coordinate with f explicitly. 148 Stop() bool 149 150 // Reset changes the timer to expire after duration d. 151 // 152 // Reset should be invoked only on stopped or expired timers. If the timer is 153 // known to have expired, Reset can be used directly. Otherwise, the caller 154 // must coordinate with the function f of Clock.AfterFunc(d, f). 155 Reset(d time.Duration) 156 } 157 158 // Address is a byte slice cast as a string that represents the address of a 159 // network node. Or, in the case of unix endpoints, it may represent a path. 160 // 161 // +stateify savable 162 type Address struct { 163 addr [16]byte 164 length int 165 } 166 167 // AddrFrom4 converts addr to an Address. 168 func AddrFrom4(addr [4]byte) Address { 169 ret := Address{ 170 length: 4, 171 } 172 // It's guaranteed that copy will return 4. 173 copy(ret.addr[:], addr[:]) 174 return ret 175 } 176 177 // AddrFrom4Slice converts addr to an Address. It panics if len(addr) != 4. 178 func AddrFrom4Slice(addr []byte) Address { 179 if len(addr) != 4 { 180 panic(fmt.Sprintf("bad address length for address %v", addr)) 181 } 182 ret := Address{ 183 length: 4, 184 } 185 // It's guaranteed that copy will return 4. 186 copy(ret.addr[:], addr) 187 return ret 188 } 189 190 // AddrFrom16 converts addr to an Address. 191 func AddrFrom16(addr [16]byte) Address { 192 ret := Address{ 193 length: 16, 194 } 195 // It's guaranteed that copy will return 16. 196 copy(ret.addr[:], addr[:]) 197 return ret 198 } 199 200 // AddrFrom16Slice converts addr to an Address. It panics if len(addr) != 16. 201 func AddrFrom16Slice(addr []byte) Address { 202 if len(addr) != 16 { 203 panic(fmt.Sprintf("bad address length for address %v", addr)) 204 } 205 ret := Address{ 206 length: 16, 207 } 208 // It's guaranteed that copy will return 16. 209 copy(ret.addr[:], addr) 210 return ret 211 } 212 213 // AddrFromSlice converts addr to an Address. It returns the Address zero value 214 // if len(addr) != 4 or 16. 215 func AddrFromSlice(addr []byte) Address { 216 switch len(addr) { 217 case ipv4AddressSize: 218 return AddrFrom4Slice(addr) 219 case ipv6AddressSize: 220 return AddrFrom16Slice(addr) 221 } 222 return Address{} 223 } 224 225 // As4 returns a as a 4 byte array. It panics if the address length is not 4. 226 func (a Address) As4() [4]byte { 227 if a.Len() != 4 { 228 panic(fmt.Sprintf("bad address length for address %v", a.addr)) 229 } 230 return *(*[4]byte)((unsafe.Pointer)(&a.addr[0])) 231 } 232 233 // As16 returns a as a 16 byte array. It panics if the address length is not 16. 234 func (a Address) As16() [16]byte { 235 if a.Len() != 16 { 236 panic(fmt.Sprintf("bad address length for address %v", a.addr)) 237 } 238 return a.addr 239 } 240 241 // AsSlice returns a as a byte slice. Callers should be careful as it can 242 // return a window into existing memory. 243 // 244 // +checkescape 245 func (a *Address) AsSlice() []byte { 246 return a.addr[:a.length] 247 } 248 249 // BitLen returns the length in bits of a. 250 func (a Address) BitLen() int { 251 return a.Len() * 8 252 } 253 254 // Len returns the length in bytes of a. 255 func (a Address) Len() int { 256 return a.length 257 } 258 259 // WithPrefix returns the address with a prefix that represents a point subnet. 260 func (a Address) WithPrefix() AddressWithPrefix { 261 return AddressWithPrefix{ 262 Address: a, 263 PrefixLen: a.BitLen(), 264 } 265 } 266 267 // Unspecified returns true if the address is unspecified. 268 func (a Address) Unspecified() bool { 269 for _, b := range a.addr { 270 if b != 0 { 271 return false 272 } 273 } 274 return true 275 } 276 277 // Equal returns whether a and other are equal. It exists for use by the cmp 278 // library. 279 func (a Address) Equal(other Address) bool { 280 return a == other 281 } 282 283 // MatchingPrefix returns the matching prefix length in bits. 284 // 285 // Panics if b and a have different lengths. 286 func (a Address) MatchingPrefix(b Address) uint8 { 287 const bitsInAByte = 8 288 289 if a.Len() != b.Len() { 290 panic(fmt.Sprintf("addresses %s and %s do not have the same length", a, b)) 291 } 292 293 var prefix uint8 294 for i := 0; i < a.length; i++ { 295 aByte := a.addr[i] 296 bByte := b.addr[i] 297 298 if aByte == bByte { 299 prefix += bitsInAByte 300 continue 301 } 302 303 // Count the remaining matching bits in the byte from MSbit to LSBbit. 304 mask := uint8(1) << (bitsInAByte - 1) 305 for { 306 if aByte&mask == bByte&mask { 307 prefix++ 308 mask >>= 1 309 continue 310 } 311 312 break 313 } 314 315 break 316 } 317 318 return prefix 319 } 320 321 // AddressMask is a bitmask for an address. 322 // 323 // +stateify savable 324 type AddressMask struct { 325 mask [16]byte 326 length int 327 } 328 329 // MaskFrom returns a Mask based on str. 330 // 331 // MaskFrom may allocate, and so should not be in hot paths. 332 func MaskFrom(str string) AddressMask { 333 mask := AddressMask{length: len(str)} 334 copy(mask.mask[:], str) 335 return mask 336 } 337 338 // MaskFromBytes returns a Mask based on bs. 339 func MaskFromBytes(bs []byte) AddressMask { 340 mask := AddressMask{length: len(bs)} 341 copy(mask.mask[:], bs) 342 return mask 343 } 344 345 // String implements Stringer. 346 func (m AddressMask) String() string { 347 return fmt.Sprintf("%x", m.mask) 348 } 349 350 // AsSlice returns a as a byte slice. Callers should be careful as it can 351 // return a window into existing memory. 352 func (m *AddressMask) AsSlice() []byte { 353 return []byte(m.mask[:m.length]) 354 } 355 356 // BitLen returns the length of the mask in bits. 357 func (m AddressMask) BitLen() int { 358 return m.length * 8 359 } 360 361 // Len returns the length of the mask in bytes. 362 func (m AddressMask) Len() int { 363 return m.length 364 } 365 366 // Prefix returns the number of bits before the first host bit. 367 func (m AddressMask) Prefix() int { 368 p := 0 369 for _, b := range m.mask[:m.length] { 370 p += bits.LeadingZeros8(^b) 371 } 372 return p 373 } 374 375 // Equal returns whether m and other are equal. It exists for use by the cmp 376 // library. 377 func (m AddressMask) Equal(other AddressMask) bool { 378 return m == other 379 } 380 381 // Subnet is a subnet defined by its address and mask. 382 type Subnet struct { 383 address Address 384 mask AddressMask 385 } 386 387 // NewSubnet creates a new Subnet, checking that the address and mask are the same length. 388 func NewSubnet(a Address, m AddressMask) (Subnet, error) { 389 if a.Len() != m.Len() { 390 return Subnet{}, errSubnetLengthMismatch 391 } 392 for i := 0; i < a.Len(); i++ { 393 if a.addr[i]&^m.mask[i] != 0 { 394 return Subnet{}, errSubnetAddressMasked 395 } 396 } 397 return Subnet{a, m}, nil 398 } 399 400 // String implements Stringer. 401 func (s Subnet) String() string { 402 return fmt.Sprintf("%s/%d", s.ID(), s.Prefix()) 403 } 404 405 // Contains returns true iff the address is of the same length and matches the 406 // subnet address and mask. 407 func (s *Subnet) Contains(a Address) bool { 408 if a.Len() != s.address.Len() { 409 return false 410 } 411 for i := 0; i < a.Len(); i++ { 412 if a.addr[i]&s.mask.mask[i] != s.address.addr[i] { 413 return false 414 } 415 } 416 return true 417 } 418 419 // ID returns the subnet ID. 420 func (s *Subnet) ID() Address { 421 return s.address 422 } 423 424 // Bits returns the number of ones (network bits) and zeros (host bits) in the 425 // subnet mask. 426 func (s *Subnet) Bits() (ones int, zeros int) { 427 ones = s.mask.Prefix() 428 return ones, s.mask.BitLen() - ones 429 } 430 431 // Prefix returns the number of bits before the first host bit. 432 func (s *Subnet) Prefix() int { 433 return s.mask.Prefix() 434 } 435 436 // Mask returns the subnet mask. 437 func (s *Subnet) Mask() AddressMask { 438 return s.mask 439 } 440 441 // Broadcast returns the subnet's broadcast address. 442 func (s *Subnet) Broadcast() Address { 443 addrCopy := s.address 444 for i := 0; i < addrCopy.Len(); i++ { 445 addrCopy.addr[i] |= ^s.mask.mask[i] 446 } 447 return addrCopy 448 } 449 450 // IsBroadcast returns true if the address is considered a broadcast address. 451 func (s *Subnet) IsBroadcast(address Address) bool { 452 // Only IPv4 supports the notion of a broadcast address. 453 if address.Len() != ipv4AddressSize { 454 return false 455 } 456 457 // Normally, we would just compare address with the subnet's broadcast 458 // address but there is an exception where a simple comparison is not 459 // correct. This exception is for /31 and /32 IPv4 subnets where all 460 // addresses are considered valid host addresses. 461 // 462 // For /31 subnets, the case is easy. RFC 3021 Section 2.1 states that 463 // both addresses in a /31 subnet "MUST be interpreted as host addresses." 464 // 465 // For /32, the case is a bit more vague. RFC 3021 makes no mention of /32 466 // subnets. However, the same reasoning applies - if an exception is not 467 // made, then there do not exist any host addresses in a /32 subnet. RFC 468 // 4632 Section 3.1 also vaguely implies this interpretation by referring 469 // to addresses in /32 subnets as "host routes." 470 return s.Prefix() <= 30 && s.Broadcast() == address 471 } 472 473 // Equal returns true if this Subnet is equal to the given Subnet. 474 func (s Subnet) Equal(o Subnet) bool { 475 // If this changes, update Route.Equal accordingly. 476 return s == o 477 } 478 479 // NICID is a number that uniquely identifies a NIC. 480 type NICID int32 481 482 // ShutdownFlags represents flags that can be passed to the Shutdown() method 483 // of the Endpoint interface. 484 type ShutdownFlags int 485 486 // Values of the flags that can be passed to the Shutdown() method. They can 487 // be OR'ed together. 488 const ( 489 ShutdownRead ShutdownFlags = 1 << iota 490 ShutdownWrite 491 ) 492 493 // PacketType is used to indicate the destination of the packet. 494 type PacketType uint8 495 496 const ( 497 // PacketHost indicates a packet addressed to the local host. 498 PacketHost PacketType = iota 499 500 // PacketOtherHost indicates an outgoing packet addressed to 501 // another host caught by a NIC in promiscuous mode. 502 PacketOtherHost 503 504 // PacketOutgoing for a packet originating from the local host 505 // that is looped back to a packet socket. 506 PacketOutgoing 507 508 // PacketBroadcast indicates a link layer broadcast packet. 509 PacketBroadcast 510 511 // PacketMulticast indicates a link layer multicast packet. 512 PacketMulticast 513 ) 514 515 // FullAddress represents a full transport node address, as required by the 516 // Connect() and Bind() methods. 517 // 518 // +stateify savable 519 type FullAddress struct { 520 // NIC is the ID of the NIC this address refers to. 521 // 522 // This may not be used by all endpoint types. 523 NIC NICID 524 525 // Addr is the network address. 526 Addr Address 527 528 // Port is the transport port. 529 // 530 // This may not be used by all endpoint types. 531 Port uint16 532 533 // LinkAddr is the link layer address. 534 LinkAddr LinkAddress 535 } 536 537 // Payloader is an interface that provides data. 538 // 539 // This interface allows the endpoint to request the amount of data it needs 540 // based on internal buffers without exposing them. 541 type Payloader interface { 542 io.Reader 543 544 // Len returns the number of bytes of the unread portion of the 545 // Reader. 546 Len() int 547 } 548 549 var _ Payloader = (*bytes.Buffer)(nil) 550 var _ Payloader = (*bytes.Reader)(nil) 551 552 var _ io.Writer = (*SliceWriter)(nil) 553 554 // SliceWriter implements io.Writer for slices. 555 type SliceWriter []byte 556 557 // Write implements io.Writer.Write. 558 func (s *SliceWriter) Write(b []byte) (int, error) { 559 n := copy(*s, b) 560 *s = (*s)[n:] 561 var err error 562 if n != len(b) { 563 err = io.ErrShortWrite 564 } 565 return n, err 566 } 567 568 var _ io.Writer = (*LimitedWriter)(nil) 569 570 // A LimitedWriter writes to W but limits the amount of data copied to just N 571 // bytes. Each call to Write updates N to reflect the new amount remaining. 572 type LimitedWriter struct { 573 W io.Writer 574 N int64 575 } 576 577 func (l *LimitedWriter) Write(p []byte) (int, error) { 578 pLen := int64(len(p)) 579 if pLen > l.N { 580 p = p[:l.N] 581 } 582 n, err := l.W.Write(p) 583 n64 := int64(n) 584 if err == nil && n64 != pLen { 585 err = io.ErrShortWrite 586 } 587 l.N -= n64 588 return n, err 589 } 590 591 // SendableControlMessages contains socket control messages that can be written. 592 // 593 // +stateify savable 594 type SendableControlMessages struct { 595 // HasTTL indicates whether TTL is valid/set. 596 HasTTL bool 597 598 // TTL is the IPv4 Time To Live of the associated packet. 599 TTL uint8 600 601 // HasHopLimit indicates whether HopLimit is valid/set. 602 HasHopLimit bool 603 604 // HopLimit is the IPv6 Hop Limit of the associated packet. 605 HopLimit uint8 606 607 // HasIPv6PacketInfo indicates whether IPv6PacketInfo is set. 608 HasIPv6PacketInfo bool 609 610 // IPv6PacketInfo holds interface and address data on an incoming packet. 611 IPv6PacketInfo IPv6PacketInfo 612 } 613 614 // ReceivableControlMessages contains socket control messages that can be 615 // received. 616 // 617 // +stateify savable 618 type ReceivableControlMessages struct { 619 // Timestamp is the time that the last packet used to create the read data 620 // was received. 621 Timestamp time.Time `state:".(int64)"` 622 623 // HasInq indicates whether Inq is valid/set. 624 HasInq bool 625 626 // Inq is the number of bytes ready to be received. 627 Inq int32 628 629 // HasTOS indicates whether TOS is valid/set. 630 HasTOS bool 631 632 // TOS is the IPv4 type of service of the associated packet. 633 TOS uint8 634 635 // HasTTL indicates whether TTL is valid/set. 636 HasTTL bool 637 638 // TTL is the IPv4 Time To Live of the associated packet. 639 TTL uint8 640 641 // HasHopLimit indicates whether HopLimit is valid/set. 642 HasHopLimit bool 643 644 // HopLimit is the IPv6 Hop Limit of the associated packet. 645 HopLimit uint8 646 647 // HasTimestamp indicates whether Timestamp is valid/set. 648 HasTimestamp bool 649 650 // HasTClass indicates whether TClass is valid/set. 651 HasTClass bool 652 653 // TClass is the IPv6 traffic class of the associated packet. 654 TClass uint32 655 656 // HasIPPacketInfo indicates whether PacketInfo is set. 657 HasIPPacketInfo bool 658 659 // PacketInfo holds interface and address data on an incoming packet. 660 PacketInfo IPPacketInfo 661 662 // HasIPv6PacketInfo indicates whether IPv6PacketInfo is set. 663 HasIPv6PacketInfo bool 664 665 // IPv6PacketInfo holds interface and address data on an incoming packet. 666 IPv6PacketInfo IPv6PacketInfo 667 668 // HasOriginalDestinationAddress indicates whether OriginalDstAddress is 669 // set. 670 HasOriginalDstAddress bool 671 672 // OriginalDestinationAddress holds the original destination address 673 // and port of the incoming packet. 674 OriginalDstAddress FullAddress 675 676 // SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE). 677 SockErr *SockError 678 } 679 680 // PacketOwner is used to get UID and GID of the packet. 681 type PacketOwner interface { 682 // KUID returns KUID of the packet. 683 KUID() uint32 684 685 // KGID returns KGID of the packet. 686 KGID() uint32 687 } 688 689 // ReadOptions contains options for Endpoint.Read. 690 type ReadOptions struct { 691 // Peek indicates whether this read is a peek. 692 Peek bool 693 694 // NeedRemoteAddr indicates whether to return the remote address, if 695 // supported. 696 NeedRemoteAddr bool 697 698 // NeedLinkPacketInfo indicates whether to return the link-layer information, 699 // if supported. 700 NeedLinkPacketInfo bool 701 } 702 703 // ReadResult represents result for a successful Endpoint.Read. 704 type ReadResult struct { 705 // Count is the number of bytes received and written to the buffer. 706 Count int 707 708 // Total is the number of bytes of the received packet. This can be used to 709 // determine whether the read is truncated. 710 Total int 711 712 // ControlMessages is the control messages received. 713 ControlMessages ReceivableControlMessages 714 715 // RemoteAddr is the remote address if ReadOptions.NeedAddr is true. 716 RemoteAddr FullAddress 717 718 // LinkPacketInfo is the link-layer information of the received packet if 719 // ReadOptions.NeedLinkPacketInfo is true. 720 LinkPacketInfo LinkPacketInfo 721 } 722 723 // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) 724 // that exposes functionality like read, write, connect, etc. to users of the 725 // networking stack. 726 type Endpoint interface { 727 // Close puts the endpoint in a closed state and frees all resources 728 // associated with it. Close initiates the teardown process, the 729 // Endpoint may not be fully closed when Close returns. 730 Close() 731 732 // Abort initiates an expedited endpoint teardown. As compared to 733 // Close, Abort prioritizes closing the Endpoint quickly over cleanly. 734 // Abort is best effort; implementing Abort with Close is acceptable. 735 Abort() 736 737 // Read reads data from the endpoint and optionally writes to dst. 738 // 739 // This method does not block if there is no data pending; in this case, 740 // ErrWouldBlock is returned. 741 // 742 // If non-zero number of bytes are successfully read and written to dst, err 743 // must be nil. Otherwise, if dst failed to write anything, ErrBadBuffer 744 // should be returned. 745 Read(io.Writer, ReadOptions) (ReadResult, Error) 746 747 // Write writes data to the endpoint's peer. This method does not block if 748 // the data cannot be written. 749 // 750 // Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes 751 // successfully written to the Endpoint. That is, if a call to 752 // Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and 753 // the caller should not use data[:n] after Write returns. 754 // 755 // Note that unlike io.Writer.Write, it is not an error for Write to 756 // perform a partial write (if n > 0, no error may be returned). Only 757 // stream (TCP) Endpoints may return partial writes, and even then only 758 // in the case where writing additional data would block. Other Endpoints 759 // will either write the entire message or return an error. 760 Write(Payloader, WriteOptions) (int64, Error) 761 762 // Connect connects the endpoint to its peer. Specifying a NIC is 763 // optional. 764 // 765 // There are three classes of return values: 766 // nil -- the attempt to connect succeeded. 767 // ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started 768 // but hasn't completed yet. In this case, the caller must call Connect 769 // or GetSockOpt(ErrorOption) when the endpoint becomes writable to 770 // get the actual result. The first call to Connect after the socket has 771 // connected returns nil. Calling connect again results in ErrAlreadyConnected. 772 // Anything else -- the attempt to connect failed. 773 // 774 // If address.Addr is empty, this means that Endpoint has to be 775 // disconnected if this is supported, otherwise 776 // ErrAddressFamilyNotSupported must be returned. 777 Connect(address FullAddress) Error 778 779 // Disconnect disconnects the endpoint from its peer. 780 Disconnect() Error 781 782 // Shutdown closes the read and/or write end of the endpoint connection 783 // to its peer. 784 Shutdown(flags ShutdownFlags) Error 785 786 // Listen puts the endpoint in "listen" mode, which allows it to accept 787 // new connections. 788 Listen(backlog int) Error 789 790 // Accept returns a new endpoint if a peer has established a connection 791 // to an endpoint previously set to listen mode. This method does not 792 // block if no new connections are available. 793 // 794 // The returned Queue is the wait queue for the newly created endpoint. 795 // 796 // If peerAddr is not nil then it is populated with the peer address of the 797 // returned endpoint. 798 Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, Error) 799 800 // Bind binds the endpoint to a specific local address and port. 801 // Specifying a NIC is optional. 802 Bind(address FullAddress) Error 803 804 // GetLocalAddress returns the address to which the endpoint is bound. 805 GetLocalAddress() (FullAddress, Error) 806 807 // GetRemoteAddress returns the address to which the endpoint is 808 // connected. 809 GetRemoteAddress() (FullAddress, Error) 810 811 // Readiness returns the current readiness of the endpoint. For example, 812 // if waiter.EventIn is set, the endpoint is immediately readable. 813 Readiness(mask waiter.EventMask) waiter.EventMask 814 815 // SetSockOpt sets a socket option. 816 SetSockOpt(opt SettableSocketOption) Error 817 818 // SetSockOptInt sets a socket option, for simple cases where a value 819 // has the int type. 820 SetSockOptInt(opt SockOptInt, v int) Error 821 822 // GetSockOpt gets a socket option. 823 GetSockOpt(opt GettableSocketOption) Error 824 825 // GetSockOptInt gets a socket option for simple cases where a return 826 // value has the int type. 827 GetSockOptInt(SockOptInt) (int, Error) 828 829 // State returns a socket's lifecycle state. The returned value is 830 // protocol-specific and is primarily used for diagnostics. 831 State() uint32 832 833 // ModerateRecvBuf should be called everytime data is copied to the user 834 // space. This allows for dynamic tuning of recv buffer space for a 835 // given socket. 836 // 837 // NOTE: This method is a no-op for sockets other than TCP. 838 ModerateRecvBuf(copied int) 839 840 // Info returns a copy to the transport endpoint info. 841 Info() EndpointInfo 842 843 // Stats returns a reference to the endpoint stats. 844 Stats() EndpointStats 845 846 // SetOwner sets the task owner to the endpoint owner. 847 SetOwner(owner PacketOwner) 848 849 // LastError clears and returns the last error reported by the endpoint. 850 LastError() Error 851 852 // SocketOptions returns the structure which contains all the socket 853 // level options. 854 SocketOptions() *SocketOptions 855 } 856 857 // EndpointWithPreflight is the interface implemented by endpoints that need 858 // to expose the `Preflight` method for preparing the endpoint prior to 859 // calling `Write`. 860 type EndpointWithPreflight interface { 861 // Prepares the endpoint for writes using the provided WriteOptions, 862 // returning an error if the options were incompatible with the endpoint's 863 // current state. 864 Preflight(WriteOptions) Error 865 } 866 867 // LinkPacketInfo holds Link layer information for a received packet. 868 // 869 // +stateify savable 870 type LinkPacketInfo struct { 871 // Protocol is the NetworkProtocolNumber for the packet. 872 Protocol NetworkProtocolNumber 873 874 // PktType is used to indicate the destination of the packet. 875 PktType PacketType 876 } 877 878 // EndpointInfo is the interface implemented by each endpoint info struct. 879 type EndpointInfo interface { 880 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo 881 // marker interface. 882 IsEndpointInfo() 883 } 884 885 // EndpointStats is the interface implemented by each endpoint stats struct. 886 type EndpointStats interface { 887 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats 888 // marker interface. 889 IsEndpointStats() 890 } 891 892 // WriteOptions contains options for Endpoint.Write. 893 type WriteOptions struct { 894 // If To is not nil, write to the given address instead of the endpoint's 895 // peer. 896 To *FullAddress 897 898 // More has the same semantics as Linux's MSG_MORE. 899 More bool 900 901 // EndOfRecord has the same semantics as Linux's MSG_EOR. 902 EndOfRecord bool 903 904 // Atomic means that all data fetched from Payloader must be written to the 905 // endpoint. If Atomic is false, then data fetched from the Payloader may be 906 // discarded if available endpoint buffer space is insufficient. 907 Atomic bool 908 909 // ControlMessages contains optional overrides used when writing a packet. 910 ControlMessages SendableControlMessages 911 } 912 913 // SockOptInt represents socket options which values have the int type. 914 type SockOptInt int 915 916 const ( 917 // KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to 918 // specify the number of un-ACKed TCP keepalives that will be sent 919 // before the connection is closed. 920 KeepaliveCountOption SockOptInt = iota 921 922 // IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS 923 // for all subsequent outgoing IPv4 packets from the endpoint. 924 IPv4TOSOption 925 926 // IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to 927 // specify TOS for all subsequent outgoing IPv6 packets from the 928 // endpoint. 929 IPv6TrafficClassOption 930 931 // MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the 932 // current Maximum Segment Size(MSS) value as specified using the 933 // TCP_MAXSEG option. 934 MaxSegOption 935 936 // MTUDiscoverOption is used to set/get the path MTU discovery setting. 937 // 938 // NOTE: Setting this option to any other value than PMTUDiscoveryDont 939 // is not supported and will fail as such, and getting this option will 940 // always return PMTUDiscoveryDont. 941 MTUDiscoverOption 942 943 // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control 944 // the default TTL value for multicast messages. The default is 1. 945 MulticastTTLOption 946 947 // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the 948 // number of unread bytes in the input buffer should be returned. 949 ReceiveQueueSizeOption 950 951 // SendQueueSizeOption is used in GetSockOptInt to specify that the 952 // number of unread bytes in the output buffer should be returned. 953 SendQueueSizeOption 954 955 // IPv4TTLOption is used by SetSockOptInt/GetSockOptInt to control the default 956 // TTL value for unicast messages. 957 // 958 // The default is configured by DefaultTTLOption. A UseDefaultIPv4TTL value 959 // configures the endpoint to use the default. 960 IPv4TTLOption 961 962 // IPv6HopLimitOption is used by SetSockOptInt/GetSockOptInt to control the 963 // default hop limit value for unicast messages. 964 // 965 // The default is configured by DefaultTTLOption. A UseDefaultIPv6HopLimit 966 // value configures the endpoint to use the default. 967 IPv6HopLimitOption 968 969 // TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify 970 // the number of SYN retransmits that TCP should send before aborting 971 // the attempt to connect. It cannot exceed 255. 972 // 973 // NOTE: This option is currently only stubbed out and is no-op. 974 TCPSynCountOption 975 976 // TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound 977 // the size of the advertised window to this value. 978 // 979 // NOTE: This option is currently only stubed out and is a no-op 980 TCPWindowClampOption 981 982 // IPv6Checksum is used to request the stack to populate and validate the IPv6 983 // checksum for transport level headers. 984 IPv6Checksum 985 ) 986 987 const ( 988 // UseDefaultIPv4TTL is the IPv4TTLOption value that configures an endpoint to 989 // use the default ttl currently configured by the IPv4 protocol (see 990 // DefaultTTLOption). 991 UseDefaultIPv4TTL = 0 992 993 // UseDefaultIPv6HopLimit is the IPv6HopLimitOption value that configures an 994 // endpoint to use the default hop limit currently configured by the IPv6 995 // protocol (see DefaultTTLOption). 996 UseDefaultIPv6HopLimit = -1 997 ) 998 999 const ( 1000 // PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use 1001 // per-route settings. 1002 PMTUDiscoveryWant int = iota 1003 1004 // PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable 1005 // path MTU discovery. 1006 PMTUDiscoveryDont 1007 1008 // PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do 1009 // path MTU discovery. 1010 PMTUDiscoveryDo 1011 1012 // PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF 1013 // but ignore path MTU. 1014 PMTUDiscoveryProbe 1015 ) 1016 1017 // GettableNetworkProtocolOption is a marker interface for network protocol 1018 // options that may be queried. 1019 type GettableNetworkProtocolOption interface { 1020 isGettableNetworkProtocolOption() 1021 } 1022 1023 // SettableNetworkProtocolOption is a marker interface for network protocol 1024 // options that may be set. 1025 type SettableNetworkProtocolOption interface { 1026 isSettableNetworkProtocolOption() 1027 } 1028 1029 // DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify 1030 // a default TTL. 1031 type DefaultTTLOption uint8 1032 1033 func (*DefaultTTLOption) isGettableNetworkProtocolOption() {} 1034 1035 func (*DefaultTTLOption) isSettableNetworkProtocolOption() {} 1036 1037 // GettableTransportProtocolOption is a marker interface for transport protocol 1038 // options that may be queried. 1039 type GettableTransportProtocolOption interface { 1040 isGettableTransportProtocolOption() 1041 } 1042 1043 // SettableTransportProtocolOption is a marker interface for transport protocol 1044 // options that may be set. 1045 type SettableTransportProtocolOption interface { 1046 isSettableTransportProtocolOption() 1047 } 1048 1049 // TCPSACKEnabled the SACK option for TCP. 1050 // 1051 // See: https://tools.ietf.org/html/rfc2018. 1052 type TCPSACKEnabled bool 1053 1054 func (*TCPSACKEnabled) isGettableTransportProtocolOption() {} 1055 1056 func (*TCPSACKEnabled) isSettableTransportProtocolOption() {} 1057 1058 // TCPRecovery is the loss deteoction algorithm used by TCP. 1059 type TCPRecovery int32 1060 1061 func (*TCPRecovery) isGettableTransportProtocolOption() {} 1062 1063 func (*TCPRecovery) isSettableTransportProtocolOption() {} 1064 1065 // TCPAlwaysUseSynCookies indicates unconditional usage of syncookies. 1066 type TCPAlwaysUseSynCookies bool 1067 1068 func (*TCPAlwaysUseSynCookies) isGettableTransportProtocolOption() {} 1069 1070 func (*TCPAlwaysUseSynCookies) isSettableTransportProtocolOption() {} 1071 1072 const ( 1073 // TCPRACKLossDetection indicates RACK is used for loss detection and 1074 // recovery. 1075 TCPRACKLossDetection TCPRecovery = 1 << iota 1076 1077 // TCPRACKStaticReoWnd indicates the reordering window should not be 1078 // adjusted when DSACK is received. 1079 TCPRACKStaticReoWnd 1080 1081 // TCPRACKNoDupTh indicates RACK should not consider the classic three 1082 // duplicate acknowledgements rule to mark the segments as lost. This 1083 // is used when reordering is not detected. 1084 TCPRACKNoDupTh 1085 ) 1086 1087 // TCPDelayEnabled enables/disables Nagle's algorithm in TCP. 1088 type TCPDelayEnabled bool 1089 1090 func (*TCPDelayEnabled) isGettableTransportProtocolOption() {} 1091 1092 func (*TCPDelayEnabled) isSettableTransportProtocolOption() {} 1093 1094 // TCPSendBufferSizeRangeOption is the send buffer size range for TCP. 1095 type TCPSendBufferSizeRangeOption struct { 1096 Min int 1097 Default int 1098 Max int 1099 } 1100 1101 func (*TCPSendBufferSizeRangeOption) isGettableTransportProtocolOption() {} 1102 1103 func (*TCPSendBufferSizeRangeOption) isSettableTransportProtocolOption() {} 1104 1105 // TCPReceiveBufferSizeRangeOption is the receive buffer size range for TCP. 1106 type TCPReceiveBufferSizeRangeOption struct { 1107 Min int 1108 Default int 1109 Max int 1110 } 1111 1112 func (*TCPReceiveBufferSizeRangeOption) isGettableTransportProtocolOption() {} 1113 1114 func (*TCPReceiveBufferSizeRangeOption) isSettableTransportProtocolOption() {} 1115 1116 // TCPAvailableCongestionControlOption is the supported congestion control 1117 // algorithms for TCP 1118 type TCPAvailableCongestionControlOption string 1119 1120 func (*TCPAvailableCongestionControlOption) isGettableTransportProtocolOption() {} 1121 1122 func (*TCPAvailableCongestionControlOption) isSettableTransportProtocolOption() {} 1123 1124 // TCPModerateReceiveBufferOption enables/disables receive buffer moderation 1125 // for TCP. 1126 type TCPModerateReceiveBufferOption bool 1127 1128 func (*TCPModerateReceiveBufferOption) isGettableTransportProtocolOption() {} 1129 1130 func (*TCPModerateReceiveBufferOption) isSettableTransportProtocolOption() {} 1131 1132 // GettableSocketOption is a marker interface for socket options that may be 1133 // queried. 1134 type GettableSocketOption interface { 1135 isGettableSocketOption() 1136 } 1137 1138 // SettableSocketOption is a marker interface for socket options that may be 1139 // configured. 1140 type SettableSocketOption interface { 1141 isSettableSocketOption() 1142 } 1143 1144 // ICMPv6Filter specifies a filter for ICMPv6 types. 1145 // 1146 // +stateify savable 1147 type ICMPv6Filter struct { 1148 // DenyType indicates if an ICMP type should be blocked. 1149 // 1150 // The ICMPv6 type field is 8 bits so there are up to 256 different ICMPv6 1151 // types. 1152 DenyType [8]uint32 1153 } 1154 1155 // ShouldDeny returns true iff the ICMPv6 Type should be denied. 1156 func (f *ICMPv6Filter) ShouldDeny(icmpType uint8) bool { 1157 const bitsInUint32 = 32 1158 i := icmpType / bitsInUint32 1159 b := icmpType % bitsInUint32 1160 return f.DenyType[i]&(1<<b) != 0 1161 } 1162 1163 func (*ICMPv6Filter) isGettableSocketOption() {} 1164 1165 func (*ICMPv6Filter) isSettableSocketOption() {} 1166 1167 // EndpointState represents the state of an endpoint. 1168 type EndpointState uint8 1169 1170 // CongestionControlState indicates the current congestion control state for 1171 // TCP sender. 1172 type CongestionControlState int 1173 1174 const ( 1175 // Open indicates that the sender is receiving acks in order and 1176 // no loss or dupACK's etc have been detected. 1177 Open CongestionControlState = iota 1178 // RTORecovery indicates that an RTO has occurred and the sender 1179 // has entered an RTO based recovery phase. 1180 RTORecovery 1181 // FastRecovery indicates that the sender has entered FastRecovery 1182 // based on receiving nDupAck's. This state is entered only when 1183 // SACK is not in use. 1184 FastRecovery 1185 // SACKRecovery indicates that the sender has entered SACK based 1186 // recovery. 1187 SACKRecovery 1188 // Disorder indicates the sender either received some SACK blocks 1189 // or dupACK's. 1190 Disorder 1191 ) 1192 1193 // TCPInfoOption is used by GetSockOpt to expose TCP statistics. 1194 // 1195 // TODO(b/64800844): Add and populate stat fields. 1196 type TCPInfoOption struct { 1197 // RTT is the smoothed round trip time. 1198 RTT time.Duration 1199 1200 // RTTVar is the round trip time variation. 1201 RTTVar time.Duration 1202 1203 // RTO is the retransmission timeout for the endpoint. 1204 RTO time.Duration 1205 1206 // State is the current endpoint protocol state. 1207 State EndpointState 1208 1209 // CcState is the congestion control state. 1210 CcState CongestionControlState 1211 1212 // SndCwnd is the congestion window, in packets. 1213 SndCwnd uint32 1214 1215 // SndSsthresh is the threshold between slow start and congestion 1216 // avoidance. 1217 SndSsthresh uint32 1218 1219 // ReorderSeen indicates if reordering is seen in the endpoint. 1220 ReorderSeen bool 1221 } 1222 1223 func (*TCPInfoOption) isGettableSocketOption() {} 1224 1225 // KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a 1226 // connection must remain idle before the first TCP keepalive packet is sent. 1227 // Once this time is reached, KeepaliveIntervalOption is used instead. 1228 type KeepaliveIdleOption time.Duration 1229 1230 func (*KeepaliveIdleOption) isGettableSocketOption() {} 1231 1232 func (*KeepaliveIdleOption) isSettableSocketOption() {} 1233 1234 // KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the 1235 // interval between sending TCP keepalive packets. 1236 type KeepaliveIntervalOption time.Duration 1237 1238 func (*KeepaliveIntervalOption) isGettableSocketOption() {} 1239 1240 func (*KeepaliveIntervalOption) isSettableSocketOption() {} 1241 1242 // TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user 1243 // specified timeout for a given TCP connection. 1244 // See: RFC5482 for details. 1245 type TCPUserTimeoutOption time.Duration 1246 1247 func (*TCPUserTimeoutOption) isGettableSocketOption() {} 1248 1249 func (*TCPUserTimeoutOption) isSettableSocketOption() {} 1250 1251 // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get 1252 // the current congestion control algorithm. 1253 type CongestionControlOption string 1254 1255 func (*CongestionControlOption) isGettableSocketOption() {} 1256 1257 func (*CongestionControlOption) isSettableSocketOption() {} 1258 1259 func (*CongestionControlOption) isGettableTransportProtocolOption() {} 1260 1261 func (*CongestionControlOption) isSettableTransportProtocolOption() {} 1262 1263 // TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the 1264 // maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state 1265 // before being marked closed. 1266 type TCPLingerTimeoutOption time.Duration 1267 1268 func (*TCPLingerTimeoutOption) isGettableSocketOption() {} 1269 1270 func (*TCPLingerTimeoutOption) isSettableSocketOption() {} 1271 1272 func (*TCPLingerTimeoutOption) isGettableTransportProtocolOption() {} 1273 1274 func (*TCPLingerTimeoutOption) isSettableTransportProtocolOption() {} 1275 1276 // TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the 1277 // maximum duration for which a socket lingers in the TIME_WAIT state 1278 // before being marked closed. 1279 type TCPTimeWaitTimeoutOption time.Duration 1280 1281 func (*TCPTimeWaitTimeoutOption) isGettableSocketOption() {} 1282 1283 func (*TCPTimeWaitTimeoutOption) isSettableSocketOption() {} 1284 1285 func (*TCPTimeWaitTimeoutOption) isGettableTransportProtocolOption() {} 1286 1287 func (*TCPTimeWaitTimeoutOption) isSettableTransportProtocolOption() {} 1288 1289 // TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a 1290 // accept to return a completed connection only when there is data to be 1291 // read. This usually means the listening socket will drop the final ACK 1292 // for a handshake till the specified timeout until a segment with data arrives. 1293 type TCPDeferAcceptOption time.Duration 1294 1295 func (*TCPDeferAcceptOption) isGettableSocketOption() {} 1296 1297 func (*TCPDeferAcceptOption) isSettableSocketOption() {} 1298 1299 // TCPMinRTOOption is use by SetSockOpt/GetSockOpt to allow overriding 1300 // default MinRTO used by the Stack. 1301 type TCPMinRTOOption time.Duration 1302 1303 func (*TCPMinRTOOption) isGettableSocketOption() {} 1304 1305 func (*TCPMinRTOOption) isSettableSocketOption() {} 1306 1307 func (*TCPMinRTOOption) isGettableTransportProtocolOption() {} 1308 1309 func (*TCPMinRTOOption) isSettableTransportProtocolOption() {} 1310 1311 // TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding 1312 // default MaxRTO used by the Stack. 1313 type TCPMaxRTOOption time.Duration 1314 1315 func (*TCPMaxRTOOption) isGettableSocketOption() {} 1316 1317 func (*TCPMaxRTOOption) isSettableSocketOption() {} 1318 1319 func (*TCPMaxRTOOption) isGettableTransportProtocolOption() {} 1320 1321 func (*TCPMaxRTOOption) isSettableTransportProtocolOption() {} 1322 1323 // TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the 1324 // maximum number of retransmits after which we time out the connection. 1325 type TCPMaxRetriesOption uint64 1326 1327 func (*TCPMaxRetriesOption) isGettableSocketOption() {} 1328 1329 func (*TCPMaxRetriesOption) isSettableSocketOption() {} 1330 1331 func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {} 1332 1333 func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {} 1334 1335 // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide 1336 // default for number of times SYN is retransmitted before aborting a connect. 1337 type TCPSynRetriesOption uint8 1338 1339 func (*TCPSynRetriesOption) isGettableSocketOption() {} 1340 1341 func (*TCPSynRetriesOption) isSettableSocketOption() {} 1342 1343 func (*TCPSynRetriesOption) isGettableTransportProtocolOption() {} 1344 1345 func (*TCPSynRetriesOption) isSettableTransportProtocolOption() {} 1346 1347 // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a 1348 // default interface for multicast. 1349 type MulticastInterfaceOption struct { 1350 NIC NICID 1351 InterfaceAddr Address 1352 } 1353 1354 func (*MulticastInterfaceOption) isGettableSocketOption() {} 1355 1356 func (*MulticastInterfaceOption) isSettableSocketOption() {} 1357 1358 // MembershipOption is used to identify a multicast membership on an interface. 1359 type MembershipOption struct { 1360 NIC NICID 1361 InterfaceAddr Address 1362 MulticastAddr Address 1363 } 1364 1365 // AddMembershipOption identifies a multicast group to join on some interface. 1366 type AddMembershipOption MembershipOption 1367 1368 func (*AddMembershipOption) isSettableSocketOption() {} 1369 1370 // RemoveMembershipOption identifies a multicast group to leave on some 1371 // interface. 1372 type RemoveMembershipOption MembershipOption 1373 1374 func (*RemoveMembershipOption) isSettableSocketOption() {} 1375 1376 // SocketDetachFilterOption is used by SetSockOpt to detach a previously attached 1377 // classic BPF filter on a given endpoint. 1378 type SocketDetachFilterOption int 1379 1380 func (*SocketDetachFilterOption) isSettableSocketOption() {} 1381 1382 // OriginalDestinationOption is used to get the original destination address 1383 // and port of a redirected packet. 1384 type OriginalDestinationOption FullAddress 1385 1386 func (*OriginalDestinationOption) isGettableSocketOption() {} 1387 1388 // TCPTimeWaitReuseOption is used stack.(*Stack).TransportProtocolOption to 1389 // specify if the stack can reuse the port bound by an endpoint in TIME-WAIT for 1390 // new connections when it is safe from protocol viewpoint. 1391 type TCPTimeWaitReuseOption uint8 1392 1393 func (*TCPTimeWaitReuseOption) isGettableSocketOption() {} 1394 1395 func (*TCPTimeWaitReuseOption) isSettableSocketOption() {} 1396 1397 func (*TCPTimeWaitReuseOption) isGettableTransportProtocolOption() {} 1398 1399 func (*TCPTimeWaitReuseOption) isSettableTransportProtocolOption() {} 1400 1401 const ( 1402 // TCPTimeWaitReuseDisabled indicates reuse of port bound by endpoints in TIME-WAIT cannot 1403 // be reused for new connections. 1404 TCPTimeWaitReuseDisabled TCPTimeWaitReuseOption = iota 1405 1406 // TCPTimeWaitReuseGlobal indicates reuse of port bound by endpoints in TIME-WAIT can 1407 // be reused for new connections irrespective of the src/dest addresses. 1408 TCPTimeWaitReuseGlobal 1409 1410 // TCPTimeWaitReuseLoopbackOnly indicates reuse of port bound by endpoint in TIME-WAIT can 1411 // only be reused if the connection was a connection over loopback. i.e src/dest addresses 1412 // are loopback addresses. 1413 TCPTimeWaitReuseLoopbackOnly 1414 ) 1415 1416 // LingerOption is used by SetSockOpt/GetSockOpt to set/get the 1417 // duration for which a socket lingers before returning from Close. 1418 // 1419 // +marshal 1420 // +stateify savable 1421 type LingerOption struct { 1422 Enabled bool 1423 Timeout time.Duration 1424 } 1425 1426 // IPPacketInfo is the message structure for IP_PKTINFO. 1427 // 1428 // +stateify savable 1429 type IPPacketInfo struct { 1430 // NIC is the ID of the NIC to be used. 1431 NIC NICID 1432 1433 // LocalAddr is the local address. 1434 LocalAddr Address 1435 1436 // DestinationAddr is the destination address found in the IP header. 1437 DestinationAddr Address 1438 } 1439 1440 // IPv6PacketInfo is the message structure for IPV6_PKTINFO. 1441 // 1442 // +stateify savable 1443 type IPv6PacketInfo struct { 1444 Addr Address 1445 NIC NICID 1446 } 1447 1448 // SendBufferSizeOption is used by stack.(Stack*).Option/SetOption to 1449 // get/set the default, min and max send buffer sizes. 1450 type SendBufferSizeOption struct { 1451 // Min is the minimum size for send buffer. 1452 Min int 1453 1454 // Default is the default size for send buffer. 1455 Default int 1456 1457 // Max is the maximum size for send buffer. 1458 Max int 1459 } 1460 1461 // ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to 1462 // get/set the default, min and max receive buffer sizes. 1463 type ReceiveBufferSizeOption struct { 1464 // Min is the minimum size for send buffer. 1465 Min int 1466 1467 // Default is the default size for send buffer. 1468 Default int 1469 1470 // Max is the maximum size for send buffer. 1471 Max int 1472 } 1473 1474 // GetSendBufferLimits is used to get the send buffer size limits. 1475 type GetSendBufferLimits func(StackHandler) SendBufferSizeOption 1476 1477 // GetStackSendBufferLimits is used to get default, min and max send buffer size. 1478 func GetStackSendBufferLimits(so StackHandler) SendBufferSizeOption { 1479 var ss SendBufferSizeOption 1480 if err := so.Option(&ss); err != nil { 1481 panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err)) 1482 } 1483 return ss 1484 } 1485 1486 // GetReceiveBufferLimits is used to get the send buffer size limits. 1487 type GetReceiveBufferLimits func(StackHandler) ReceiveBufferSizeOption 1488 1489 // GetStackReceiveBufferLimits is used to get default, min and max send buffer size. 1490 func GetStackReceiveBufferLimits(so StackHandler) ReceiveBufferSizeOption { 1491 var ss ReceiveBufferSizeOption 1492 if err := so.Option(&ss); err != nil { 1493 panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err)) 1494 } 1495 return ss 1496 } 1497 1498 // Route is a row in the routing table. It specifies through which NIC (and 1499 // gateway) sets of packets should be routed. A row is considered viable if the 1500 // masked target address matches the destination address in the row. 1501 type Route struct { 1502 // Destination must contain the target address for this row to be viable. 1503 Destination Subnet 1504 1505 // Gateway is the gateway to be used if this row is viable. 1506 Gateway Address 1507 1508 // NIC is the id of the nic to be used if this row is viable. 1509 NIC NICID 1510 1511 // SourceHint indicates a preferred source address to use when NICs 1512 // have multiple addresses. 1513 SourceHint Address 1514 } 1515 1516 // String implements the fmt.Stringer interface. 1517 func (r Route) String() string { 1518 var out strings.Builder 1519 _, _ = fmt.Fprintf(&out, "%s", r.Destination) 1520 if r.Gateway.length > 0 { 1521 _, _ = fmt.Fprintf(&out, " via %s", r.Gateway) 1522 } 1523 _, _ = fmt.Fprintf(&out, " nic %d", r.NIC) 1524 return out.String() 1525 } 1526 1527 // Equal returns true if the given Route is equal to this Route. 1528 func (r Route) Equal(to Route) bool { 1529 // NOTE: This relies on the fact that r.Destination == to.Destination 1530 return r.Destination.Equal(to.Destination) && r.Gateway == to.Gateway && r.NIC == to.NIC 1531 } 1532 1533 // TransportProtocolNumber is the number of a transport protocol. 1534 type TransportProtocolNumber uint32 1535 1536 // NetworkProtocolNumber is the EtherType of a network protocol in an Ethernet 1537 // frame. 1538 // 1539 // See: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml 1540 type NetworkProtocolNumber uint32 1541 1542 // A StatCounter keeps track of a statistic. 1543 // 1544 // +stateify savable 1545 type StatCounter struct { 1546 count atomicbitops.Uint64 1547 } 1548 1549 // Increment adds one to the counter. 1550 func (s *StatCounter) Increment() { 1551 s.IncrementBy(1) 1552 } 1553 1554 // Decrement minuses one to the counter. 1555 func (s *StatCounter) Decrement() { 1556 s.IncrementBy(^uint64(0)) 1557 } 1558 1559 // Value returns the current value of the counter. 1560 func (s *StatCounter) Value() uint64 { 1561 return s.count.Load() 1562 } 1563 1564 // IncrementBy increments the counter by v. 1565 func (s *StatCounter) IncrementBy(v uint64) { 1566 s.count.Add(v) 1567 } 1568 1569 func (s *StatCounter) String() string { 1570 return strconv.FormatUint(s.Value(), 10) 1571 } 1572 1573 // A MultiCounterStat keeps track of two counters at once. 1574 type MultiCounterStat struct { 1575 a *StatCounter 1576 b *StatCounter 1577 } 1578 1579 // Init sets both internal counters to point to a and b. 1580 func (m *MultiCounterStat) Init(a, b *StatCounter) { 1581 m.a = a 1582 m.b = b 1583 } 1584 1585 // Increment adds one to the counters. 1586 func (m *MultiCounterStat) Increment() { 1587 m.a.Increment() 1588 m.b.Increment() 1589 } 1590 1591 // IncrementBy increments the counters by v. 1592 func (m *MultiCounterStat) IncrementBy(v uint64) { 1593 m.a.IncrementBy(v) 1594 m.b.IncrementBy(v) 1595 } 1596 1597 // ICMPv4PacketStats enumerates counts for all ICMPv4 packet types. 1598 type ICMPv4PacketStats struct { 1599 // LINT.IfChange(ICMPv4PacketStats) 1600 1601 // EchoRequest is the number of ICMPv4 echo packets counted. 1602 EchoRequest *StatCounter 1603 1604 // EchoReply is the number of ICMPv4 echo reply packets counted. 1605 EchoReply *StatCounter 1606 1607 // DstUnreachable is the number of ICMPv4 destination unreachable packets 1608 // counted. 1609 DstUnreachable *StatCounter 1610 1611 // SrcQuench is the number of ICMPv4 source quench packets counted. 1612 SrcQuench *StatCounter 1613 1614 // Redirect is the number of ICMPv4 redirect packets counted. 1615 Redirect *StatCounter 1616 1617 // TimeExceeded is the number of ICMPv4 time exceeded packets counted. 1618 TimeExceeded *StatCounter 1619 1620 // ParamProblem is the number of ICMPv4 parameter problem packets counted. 1621 ParamProblem *StatCounter 1622 1623 // Timestamp is the number of ICMPv4 timestamp packets counted. 1624 Timestamp *StatCounter 1625 1626 // TimestampReply is the number of ICMPv4 timestamp reply packets counted. 1627 TimestampReply *StatCounter 1628 1629 // InfoRequest is the number of ICMPv4 information request packets counted. 1630 InfoRequest *StatCounter 1631 1632 // InfoReply is the number of ICMPv4 information reply packets counted. 1633 InfoReply *StatCounter 1634 1635 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4PacketStats) 1636 } 1637 1638 // ICMPv4SentPacketStats collects outbound ICMPv4-specific stats. 1639 type ICMPv4SentPacketStats struct { 1640 // LINT.IfChange(ICMPv4SentPacketStats) 1641 1642 ICMPv4PacketStats 1643 1644 // Dropped is the number of ICMPv4 packets dropped due to link layer errors. 1645 Dropped *StatCounter 1646 1647 // RateLimited is the number of ICMPv4 packets dropped due to rate limit being 1648 // exceeded. 1649 RateLimited *StatCounter 1650 1651 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4SentPacketStats) 1652 } 1653 1654 // ICMPv4ReceivedPacketStats collects inbound ICMPv4-specific stats. 1655 type ICMPv4ReceivedPacketStats struct { 1656 // LINT.IfChange(ICMPv4ReceivedPacketStats) 1657 1658 ICMPv4PacketStats 1659 1660 // Invalid is the number of invalid ICMPv4 packets received. 1661 Invalid *StatCounter 1662 1663 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4ReceivedPacketStats) 1664 } 1665 1666 // ICMPv4Stats collects ICMPv4-specific stats. 1667 type ICMPv4Stats struct { 1668 // LINT.IfChange(ICMPv4Stats) 1669 1670 // PacketsSent contains statistics about sent packets. 1671 PacketsSent ICMPv4SentPacketStats 1672 1673 // PacketsReceived contains statistics about received packets. 1674 PacketsReceived ICMPv4ReceivedPacketStats 1675 1676 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4Stats) 1677 } 1678 1679 // ICMPv6PacketStats enumerates counts for all ICMPv6 packet types. 1680 type ICMPv6PacketStats struct { 1681 // LINT.IfChange(ICMPv6PacketStats) 1682 1683 // EchoRequest is the number of ICMPv6 echo request packets counted. 1684 EchoRequest *StatCounter 1685 1686 // EchoReply is the number of ICMPv6 echo reply packets counted. 1687 EchoReply *StatCounter 1688 1689 // DstUnreachable is the number of ICMPv6 destination unreachable packets 1690 // counted. 1691 DstUnreachable *StatCounter 1692 1693 // PacketTooBig is the number of ICMPv6 packet too big packets counted. 1694 PacketTooBig *StatCounter 1695 1696 // TimeExceeded is the number of ICMPv6 time exceeded packets counted. 1697 TimeExceeded *StatCounter 1698 1699 // ParamProblem is the number of ICMPv6 parameter problem packets counted. 1700 ParamProblem *StatCounter 1701 1702 // RouterSolicit is the number of ICMPv6 router solicit packets counted. 1703 RouterSolicit *StatCounter 1704 1705 // RouterAdvert is the number of ICMPv6 router advert packets counted. 1706 RouterAdvert *StatCounter 1707 1708 // NeighborSolicit is the number of ICMPv6 neighbor solicit packets counted. 1709 NeighborSolicit *StatCounter 1710 1711 // NeighborAdvert is the number of ICMPv6 neighbor advert packets counted. 1712 NeighborAdvert *StatCounter 1713 1714 // RedirectMsg is the number of ICMPv6 redirect message packets counted. 1715 RedirectMsg *StatCounter 1716 1717 // MulticastListenerQuery is the number of Multicast Listener Query messages 1718 // counted. 1719 MulticastListenerQuery *StatCounter 1720 1721 // MulticastListenerReport is the number of Multicast Listener Report messages 1722 // counted. 1723 MulticastListenerReport *StatCounter 1724 1725 // MulticastListenerReportV2 is the number of Multicast Listener Report 1726 // messages counted. 1727 MulticastListenerReportV2 *StatCounter 1728 1729 // MulticastListenerDone is the number of Multicast Listener Done messages 1730 // counted. 1731 MulticastListenerDone *StatCounter 1732 1733 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6PacketStats) 1734 } 1735 1736 // ICMPv6SentPacketStats collects outbound ICMPv6-specific stats. 1737 type ICMPv6SentPacketStats struct { 1738 // LINT.IfChange(ICMPv6SentPacketStats) 1739 1740 ICMPv6PacketStats 1741 1742 // Dropped is the number of ICMPv6 packets dropped due to link layer errors. 1743 Dropped *StatCounter 1744 1745 // RateLimited is the number of ICMPv6 packets dropped due to rate limit being 1746 // exceeded. 1747 RateLimited *StatCounter 1748 1749 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6SentPacketStats) 1750 } 1751 1752 // ICMPv6ReceivedPacketStats collects inbound ICMPv6-specific stats. 1753 type ICMPv6ReceivedPacketStats struct { 1754 // LINT.IfChange(ICMPv6ReceivedPacketStats) 1755 1756 ICMPv6PacketStats 1757 1758 // Unrecognized is the number of ICMPv6 packets received that the transport 1759 // layer does not know how to parse. 1760 Unrecognized *StatCounter 1761 1762 // Invalid is the number of invalid ICMPv6 packets received. 1763 Invalid *StatCounter 1764 1765 // RouterOnlyPacketsDroppedByHost is the number of ICMPv6 packets dropped due 1766 // to being router-specific packets. 1767 RouterOnlyPacketsDroppedByHost *StatCounter 1768 1769 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6ReceivedPacketStats) 1770 } 1771 1772 // ICMPv6Stats collects ICMPv6-specific stats. 1773 type ICMPv6Stats struct { 1774 // LINT.IfChange(ICMPv6Stats) 1775 1776 // PacketsSent contains statistics about sent packets. 1777 PacketsSent ICMPv6SentPacketStats 1778 1779 // PacketsReceived contains statistics about received packets. 1780 PacketsReceived ICMPv6ReceivedPacketStats 1781 1782 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6Stats) 1783 } 1784 1785 // ICMPStats collects ICMP-specific stats (both v4 and v6). 1786 type ICMPStats struct { 1787 // V4 contains the ICMPv4-specifics stats. 1788 V4 ICMPv4Stats 1789 1790 // V6 contains the ICMPv4-specifics stats. 1791 V6 ICMPv6Stats 1792 } 1793 1794 // IGMPPacketStats enumerates counts for all IGMP packet types. 1795 type IGMPPacketStats struct { 1796 // LINT.IfChange(IGMPPacketStats) 1797 1798 // MembershipQuery is the number of Membership Query messages counted. 1799 MembershipQuery *StatCounter 1800 1801 // V1MembershipReport is the number of Version 1 Membership Report messages 1802 // counted. 1803 V1MembershipReport *StatCounter 1804 1805 // V2MembershipReport is the number of Version 2 Membership Report messages 1806 // counted. 1807 V2MembershipReport *StatCounter 1808 1809 // V3MembershipReport is the number of Version 3 Membership Report messages 1810 // counted. 1811 V3MembershipReport *StatCounter 1812 1813 // LeaveGroup is the number of Leave Group messages counted. 1814 LeaveGroup *StatCounter 1815 1816 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPPacketStats) 1817 } 1818 1819 // IGMPSentPacketStats collects outbound IGMP-specific stats. 1820 type IGMPSentPacketStats struct { 1821 // LINT.IfChange(IGMPSentPacketStats) 1822 1823 IGMPPacketStats 1824 1825 // Dropped is the number of IGMP packets dropped. 1826 Dropped *StatCounter 1827 1828 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPSentPacketStats) 1829 } 1830 1831 // IGMPReceivedPacketStats collects inbound IGMP-specific stats. 1832 type IGMPReceivedPacketStats struct { 1833 // LINT.IfChange(IGMPReceivedPacketStats) 1834 1835 IGMPPacketStats 1836 1837 // Invalid is the number of invalid IGMP packets received. 1838 Invalid *StatCounter 1839 1840 // ChecksumErrors is the number of IGMP packets dropped due to bad checksums. 1841 ChecksumErrors *StatCounter 1842 1843 // Unrecognized is the number of unrecognized messages counted, these are 1844 // silently ignored for forward-compatibilty. 1845 Unrecognized *StatCounter 1846 1847 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPReceivedPacketStats) 1848 } 1849 1850 // IGMPStats collects IGMP-specific stats. 1851 type IGMPStats struct { 1852 // LINT.IfChange(IGMPStats) 1853 1854 // PacketsSent contains statistics about sent packets. 1855 PacketsSent IGMPSentPacketStats 1856 1857 // PacketsReceived contains statistics about received packets. 1858 PacketsReceived IGMPReceivedPacketStats 1859 1860 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPStats) 1861 } 1862 1863 // IPForwardingStats collects stats related to IP forwarding (both v4 and v6). 1864 type IPForwardingStats struct { 1865 // LINT.IfChange(IPForwardingStats) 1866 1867 // Unrouteable is the number of IP packets received which were dropped 1868 // because a route to their destination could not be constructed. 1869 Unrouteable *StatCounter 1870 1871 // ExhaustedTTL is the number of IP packets received which were dropped 1872 // because their TTL was exhausted. 1873 ExhaustedTTL *StatCounter 1874 1875 // InitializingSource is the number of IP packets which were dropped 1876 // because they contained a source address that may only be used on the local 1877 // network as part of initialization work. 1878 InitializingSource *StatCounter 1879 1880 // LinkLocalSource is the number of IP packets which were dropped 1881 // because they contained a link-local source address. 1882 LinkLocalSource *StatCounter 1883 1884 // LinkLocalDestination is the number of IP packets which were dropped 1885 // because they contained a link-local destination address. 1886 LinkLocalDestination *StatCounter 1887 1888 // PacketTooBig is the number of IP packets which were dropped because they 1889 // were too big for the outgoing MTU. 1890 PacketTooBig *StatCounter 1891 1892 // HostUnreachable is the number of IP packets received which could not be 1893 // successfully forwarded due to an unresolvable next hop. 1894 HostUnreachable *StatCounter 1895 1896 // ExtensionHeaderProblem is the number of IP packets which were dropped 1897 // because of a problem encountered when processing an IPv6 extension 1898 // header. 1899 ExtensionHeaderProblem *StatCounter 1900 1901 // UnexpectedMulticastInputInterface is the number of multicast packets that 1902 // were received on an interface that did not match the corresponding route's 1903 // expected input interface. 1904 UnexpectedMulticastInputInterface *StatCounter 1905 1906 // UnknownOutputEndpoint is the number of packets that could not be forwarded 1907 // because the output endpoint could not be found. 1908 UnknownOutputEndpoint *StatCounter 1909 1910 // NoMulticastPendingQueueBufferSpace is the number of multicast packets that 1911 // were dropped due to insufficient buffer space in the pending packet queue. 1912 NoMulticastPendingQueueBufferSpace *StatCounter 1913 1914 // OutgoingDeviceNoBufferSpace is the number of packets that were dropped due 1915 // to insufficient space in the outgoing device. 1916 OutgoingDeviceNoBufferSpace *StatCounter 1917 1918 // Errors is the number of IP packets received which could not be 1919 // successfully forwarded. 1920 Errors *StatCounter 1921 1922 // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPForwardingStats) 1923 } 1924 1925 // IPStats collects IP-specific stats (both v4 and v6). 1926 type IPStats struct { 1927 // LINT.IfChange(IPStats) 1928 1929 // PacketsReceived is the number of IP packets received from the link layer. 1930 PacketsReceived *StatCounter 1931 1932 // ValidPacketsReceived is the number of valid IP packets that reached the IP 1933 // layer. 1934 ValidPacketsReceived *StatCounter 1935 1936 // DisabledPacketsReceived is the number of IP packets received from the link 1937 // layer when the IP layer is disabled. 1938 DisabledPacketsReceived *StatCounter 1939 1940 // InvalidDestinationAddressesReceived is the number of IP packets received 1941 // with an unknown or invalid destination address. 1942 InvalidDestinationAddressesReceived *StatCounter 1943 1944 // InvalidSourceAddressesReceived is the number of IP packets received with a 1945 // source address that should never have been received on the wire. 1946 InvalidSourceAddressesReceived *StatCounter 1947 1948 // PacketsDelivered is the number of incoming IP packets that are successfully 1949 // delivered to the transport layer. 1950 PacketsDelivered *StatCounter 1951 1952 // PacketsSent is the number of IP packets sent via WritePacket. 1953 PacketsSent *StatCounter 1954 1955 // OutgoingPacketErrors is the number of IP packets which failed to write to a 1956 // link-layer endpoint. 1957 OutgoingPacketErrors *StatCounter 1958 1959 // MalformedPacketsReceived is the number of IP Packets that were dropped due 1960 // to the IP packet header failing validation checks. 1961 MalformedPacketsReceived *StatCounter 1962 1963 // MalformedFragmentsReceived is the number of IP Fragments that were dropped 1964 // due to the fragment failing validation checks. 1965 MalformedFragmentsReceived *StatCounter 1966 1967 // IPTablesPreroutingDropped is the number of IP packets dropped in the 1968 // Prerouting chain. 1969 IPTablesPreroutingDropped *StatCounter 1970 1971 // IPTablesInputDropped is the number of IP packets dropped in the Input 1972 // chain. 1973 IPTablesInputDropped *StatCounter 1974 1975 // IPTablesForwardDropped is the number of IP packets dropped in the Forward 1976 // chain. 1977 IPTablesForwardDropped *StatCounter 1978 1979 // IPTablesOutputDropped is the number of IP packets dropped in the Output 1980 // chain. 1981 IPTablesOutputDropped *StatCounter 1982 1983 // IPTablesPostroutingDropped is the number of IP packets dropped in the 1984 // Postrouting chain. 1985 IPTablesPostroutingDropped *StatCounter 1986 1987 // TODO(https://gvisor.dev/issues/5529): Move the IPv4-only option stats out 1988 // of IPStats. 1989 // OptionTimestampReceived is the number of Timestamp options seen. 1990 OptionTimestampReceived *StatCounter 1991 1992 // OptionRecordRouteReceived is the number of Record Route options seen. 1993 OptionRecordRouteReceived *StatCounter 1994 1995 // OptionRouterAlertReceived is the number of Router Alert options seen. 1996 OptionRouterAlertReceived *StatCounter 1997 1998 // OptionUnknownReceived is the number of unknown IP options seen. 1999 OptionUnknownReceived *StatCounter 2000 2001 // Forwarding collects stats related to IP forwarding. 2002 Forwarding IPForwardingStats 2003 2004 // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPStats) 2005 } 2006 2007 // ARPStats collects ARP-specific stats. 2008 type ARPStats struct { 2009 // LINT.IfChange(ARPStats) 2010 2011 // PacketsReceived is the number of ARP packets received from the link layer. 2012 PacketsReceived *StatCounter 2013 2014 // DisabledPacketsReceived is the number of ARP packets received from the link 2015 // layer when the ARP layer is disabled. 2016 DisabledPacketsReceived *StatCounter 2017 2018 // MalformedPacketsReceived is the number of ARP packets that were dropped due 2019 // to being malformed. 2020 MalformedPacketsReceived *StatCounter 2021 2022 // RequestsReceived is the number of ARP requests received. 2023 RequestsReceived *StatCounter 2024 2025 // RequestsReceivedUnknownTargetAddress is the number of ARP requests that 2026 // were targeted to an interface different from the one it was received on. 2027 RequestsReceivedUnknownTargetAddress *StatCounter 2028 2029 // OutgoingRequestInterfaceHasNoLocalAddressErrors is the number of failures 2030 // to send an ARP request because the interface has no network address 2031 // assigned to it. 2032 OutgoingRequestInterfaceHasNoLocalAddressErrors *StatCounter 2033 2034 // OutgoingRequestBadLocalAddressErrors is the number of failures to send an 2035 // ARP request with a bad local address. 2036 OutgoingRequestBadLocalAddressErrors *StatCounter 2037 2038 // OutgoingRequestsDropped is the number of ARP requests which failed to write 2039 // to a link-layer endpoint. 2040 OutgoingRequestsDropped *StatCounter 2041 2042 // OutgoingRequestSent is the number of ARP requests successfully written to a 2043 // link-layer endpoint. 2044 OutgoingRequestsSent *StatCounter 2045 2046 // RepliesReceived is the number of ARP replies received. 2047 RepliesReceived *StatCounter 2048 2049 // OutgoingRepliesDropped is the number of ARP replies which failed to write 2050 // to a link-layer endpoint. 2051 OutgoingRepliesDropped *StatCounter 2052 2053 // OutgoingRepliesSent is the number of ARP replies successfully written to a 2054 // link-layer endpoint. 2055 OutgoingRepliesSent *StatCounter 2056 2057 // LINT.ThenChange(network/arp/stats.go:multiCounterARPStats) 2058 } 2059 2060 // TCPStats collects TCP-specific stats. 2061 type TCPStats struct { 2062 // ActiveConnectionOpenings is the number of connections opened 2063 // successfully via Connect. 2064 ActiveConnectionOpenings *StatCounter 2065 2066 // PassiveConnectionOpenings is the number of connections opened 2067 // successfully via Listen. 2068 PassiveConnectionOpenings *StatCounter 2069 2070 // CurrentEstablished is the number of TCP connections for which the 2071 // current state is ESTABLISHED. 2072 CurrentEstablished *StatCounter 2073 2074 // CurrentConnected is the number of TCP connections that 2075 // are in connected state. 2076 CurrentConnected *StatCounter 2077 2078 // EstablishedResets is the number of times TCP connections have made 2079 // a direct transition to the CLOSED state from either the 2080 // ESTABLISHED state or the CLOSE-WAIT state. 2081 EstablishedResets *StatCounter 2082 2083 // EstablishedClosed is the number of times established TCP connections 2084 // made a transition to CLOSED state. 2085 EstablishedClosed *StatCounter 2086 2087 // EstablishedTimedout is the number of times an established connection 2088 // was reset because of keep-alive time out. 2089 EstablishedTimedout *StatCounter 2090 2091 // ListenOverflowSynDrop is the number of times the listen queue overflowed 2092 // and a SYN was dropped. 2093 ListenOverflowSynDrop *StatCounter 2094 2095 // ListenOverflowAckDrop is the number of times the final ACK 2096 // in the handshake was dropped due to overflow. 2097 ListenOverflowAckDrop *StatCounter 2098 2099 // ListenOverflowCookieSent is the number of times a SYN cookie was sent. 2100 ListenOverflowSynCookieSent *StatCounter 2101 2102 // ListenOverflowSynCookieRcvd is the number of times a valid SYN 2103 // cookie was received. 2104 ListenOverflowSynCookieRcvd *StatCounter 2105 2106 // ListenOverflowInvalidSynCookieRcvd is the number of times an invalid SYN cookie 2107 // was received. 2108 ListenOverflowInvalidSynCookieRcvd *StatCounter 2109 2110 // FailedConnectionAttempts is the number of calls to Connect or Listen 2111 // (active and passive openings, respectively) that end in an error. 2112 FailedConnectionAttempts *StatCounter 2113 2114 // ValidSegmentsReceived is the number of TCP segments received that 2115 // the transport layer successfully parsed. 2116 ValidSegmentsReceived *StatCounter 2117 2118 // InvalidSegmentsReceived is the number of TCP segments received that 2119 // the transport layer could not parse. 2120 InvalidSegmentsReceived *StatCounter 2121 2122 // SegmentsSent is the number of TCP segments sent. 2123 SegmentsSent *StatCounter 2124 2125 // SegmentSendErrors is the number of TCP segments failed to be sent. 2126 SegmentSendErrors *StatCounter 2127 2128 // ResetsSent is the number of TCP resets sent. 2129 ResetsSent *StatCounter 2130 2131 // ResetsReceived is the number of TCP resets received. 2132 ResetsReceived *StatCounter 2133 2134 // Retransmits is the number of TCP segments retransmitted. 2135 Retransmits *StatCounter 2136 2137 // FastRecovery is the number of times Fast Recovery was used to 2138 // recover from packet loss. 2139 FastRecovery *StatCounter 2140 2141 // SACKRecovery is the number of times SACK Recovery was used to 2142 // recover from packet loss. 2143 SACKRecovery *StatCounter 2144 2145 // TLPRecovery is the number of times recovery was accomplished by the tail 2146 // loss probe. 2147 TLPRecovery *StatCounter 2148 2149 // SlowStartRetransmits is the number of segments retransmitted in slow 2150 // start. 2151 SlowStartRetransmits *StatCounter 2152 2153 // FastRetransmit is the number of segments retransmitted in fast 2154 // recovery. 2155 FastRetransmit *StatCounter 2156 2157 // Timeouts is the number of times the RTO expired. 2158 Timeouts *StatCounter 2159 2160 // ChecksumErrors is the number of segments dropped due to bad checksums. 2161 ChecksumErrors *StatCounter 2162 2163 // FailedPortReservations is the number of times TCP failed to reserve 2164 // a port. 2165 FailedPortReservations *StatCounter 2166 2167 // SegmentsAckedWithDSACK is the number of segments acknowledged with 2168 // DSACK. 2169 SegmentsAckedWithDSACK *StatCounter 2170 2171 // SpuriousRecovery is the number of times the connection entered loss 2172 // recovery spuriously. 2173 SpuriousRecovery *StatCounter 2174 2175 // SpuriousRTORecovery is the number of spurious RTOs. 2176 SpuriousRTORecovery *StatCounter 2177 2178 // ForwardMaxInFlightDrop is the number of connection requests that are 2179 // dropped due to exceeding the maximum number of in-flight connection 2180 // requests. 2181 ForwardMaxInFlightDrop *StatCounter 2182 } 2183 2184 // UDPStats collects UDP-specific stats. 2185 type UDPStats struct { 2186 // PacketsReceived is the number of UDP datagrams received via 2187 // HandlePacket. 2188 PacketsReceived *StatCounter 2189 2190 // UnknownPortErrors is the number of incoming UDP datagrams dropped 2191 // because they did not have a known destination port. 2192 UnknownPortErrors *StatCounter 2193 2194 // ReceiveBufferErrors is the number of incoming UDP datagrams dropped 2195 // due to the receiving buffer being in an invalid state. 2196 ReceiveBufferErrors *StatCounter 2197 2198 // MalformedPacketsReceived is the number of incoming UDP datagrams 2199 // dropped due to the UDP header being in a malformed state. 2200 MalformedPacketsReceived *StatCounter 2201 2202 // PacketsSent is the number of UDP datagrams sent via sendUDP. 2203 PacketsSent *StatCounter 2204 2205 // PacketSendErrors is the number of datagrams failed to be sent. 2206 PacketSendErrors *StatCounter 2207 2208 // ChecksumErrors is the number of datagrams dropped due to bad checksums. 2209 ChecksumErrors *StatCounter 2210 } 2211 2212 // NICNeighborStats holds metrics for the neighbor table. 2213 type NICNeighborStats struct { 2214 // LINT.IfChange(NICNeighborStats) 2215 2216 // UnreachableEntryLookups counts the number of lookups performed on an 2217 // entry in Unreachable state. 2218 UnreachableEntryLookups *StatCounter 2219 2220 // DroppedConfirmationForNoninitiatedNeighbor counts the number of neighbor 2221 // responses that were dropped because they didn't match an entry in the 2222 // cache. 2223 DroppedConfirmationForNoninitiatedNeighbor *StatCounter 2224 2225 // DroppedInvalidLinkAddressConfirmations counts the number of neighbor 2226 // responses that were ignored because they had an invalid source link-layer 2227 // address. 2228 DroppedInvalidLinkAddressConfirmations *StatCounter 2229 2230 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICNeighborStats) 2231 } 2232 2233 // NICPacketStats holds basic packet statistics. 2234 type NICPacketStats struct { 2235 // LINT.IfChange(NICPacketStats) 2236 2237 // Packets is the number of packets counted. 2238 Packets *StatCounter 2239 2240 // Bytes is the number of bytes counted. 2241 Bytes *StatCounter 2242 2243 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICPacketStats) 2244 } 2245 2246 // IntegralStatCounterMap holds a map associating integral keys with 2247 // StatCounters. 2248 type IntegralStatCounterMap struct { 2249 mu sync.RWMutex 2250 // +checklocks:mu 2251 counterMap map[uint64]*StatCounter 2252 } 2253 2254 // Keys returns all keys present in the map. 2255 func (m *IntegralStatCounterMap) Keys() []uint64 { 2256 m.mu.RLock() 2257 defer m.mu.RUnlock() 2258 var keys []uint64 2259 for k := range m.counterMap { 2260 keys = append(keys, k) 2261 } 2262 return keys 2263 } 2264 2265 // Get returns the counter mapped by the provided key. 2266 func (m *IntegralStatCounterMap) Get(key uint64) (*StatCounter, bool) { 2267 m.mu.RLock() 2268 defer m.mu.RUnlock() 2269 counter, ok := m.counterMap[key] 2270 return counter, ok 2271 } 2272 2273 // Init initializes the map. 2274 func (m *IntegralStatCounterMap) Init() { 2275 m.mu.Lock() 2276 defer m.mu.Unlock() 2277 m.counterMap = make(map[uint64]*StatCounter) 2278 } 2279 2280 // Increment increments the counter associated with the provided key. 2281 func (m *IntegralStatCounterMap) Increment(key uint64) { 2282 m.mu.RLock() 2283 counter, ok := m.counterMap[key] 2284 m.mu.RUnlock() 2285 2286 if !ok { 2287 m.mu.Lock() 2288 counter, ok = m.counterMap[key] 2289 if !ok { 2290 counter = new(StatCounter) 2291 m.counterMap[key] = counter 2292 } 2293 m.mu.Unlock() 2294 } 2295 counter.Increment() 2296 } 2297 2298 // A MultiIntegralStatCounterMap keeps track of two integral counter maps at 2299 // once. 2300 type MultiIntegralStatCounterMap struct { 2301 a *IntegralStatCounterMap 2302 b *IntegralStatCounterMap 2303 } 2304 2305 // Init sets the internal integral counter maps to point to a and b. 2306 func (m *MultiIntegralStatCounterMap) Init(a, b *IntegralStatCounterMap) { 2307 m.a = a 2308 m.b = b 2309 } 2310 2311 // Increment increments the counter in each map corresponding to the 2312 // provided key. 2313 func (m *MultiIntegralStatCounterMap) Increment(key uint64) { 2314 m.a.Increment(key) 2315 m.b.Increment(key) 2316 } 2317 2318 // NICStats holds NIC statistics. 2319 type NICStats struct { 2320 // LINT.IfChange(NICStats) 2321 2322 // UnknownL3ProtocolRcvdPacketCounts records the number of packets received 2323 // for each unknown or unsupported network protocol number. 2324 UnknownL3ProtocolRcvdPacketCounts *IntegralStatCounterMap 2325 2326 // UnknownL4ProtocolRcvdPacketCounts records the number of packets received 2327 // for each unknown or unsupported transport protocol number. 2328 UnknownL4ProtocolRcvdPacketCounts *IntegralStatCounterMap 2329 2330 // MalformedL4RcvdPackets is the number of packets received by a NIC that 2331 // could not be delivered to a transport endpoint because the L4 header could 2332 // not be parsed. 2333 MalformedL4RcvdPackets *StatCounter 2334 2335 // Tx contains statistics about transmitted packets. 2336 Tx NICPacketStats 2337 2338 // TxPacketsDroppedNoBufferSpace is the number of packets dropepd due to the 2339 // NIC not having enough buffer space to send the packet. 2340 // 2341 // Packets may be dropped with a no buffer space error when the device TX 2342 // queue is full. 2343 TxPacketsDroppedNoBufferSpace *StatCounter 2344 2345 // Rx contains statistics about received packets. 2346 Rx NICPacketStats 2347 2348 // DisabledRx contains statistics about received packets on disabled NICs. 2349 DisabledRx NICPacketStats 2350 2351 // Neighbor contains statistics about neighbor entries. 2352 Neighbor NICNeighborStats 2353 2354 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICStats) 2355 } 2356 2357 // FillIn returns a copy of s with nil fields initialized to new StatCounters. 2358 func (s NICStats) FillIn() NICStats { 2359 InitStatCounters(reflect.ValueOf(&s).Elem()) 2360 return s 2361 } 2362 2363 // Stats holds statistics about the networking stack. 2364 type Stats struct { 2365 // TODO(https://gvisor.dev/issues/5986): Make the DroppedPackets stat less 2366 // ambiguous. 2367 2368 // DroppedPackets is the number of packets dropped at the transport layer. 2369 DroppedPackets *StatCounter 2370 2371 // NICs is an aggregation of every NIC's statistics. These should not be 2372 // incremented using this field, but using the relevant NIC multicounters. 2373 NICs NICStats 2374 2375 // ICMP is an aggregation of every NetworkEndpoint's ICMP statistics (both v4 2376 // and v6). These should not be incremented using this field, but using the 2377 // relevant NetworkEndpoint ICMP multicounters. 2378 ICMP ICMPStats 2379 2380 // IGMP is an aggregation of every NetworkEndpoint's IGMP statistics. These 2381 // should not be incremented using this field, but using the relevant 2382 // NetworkEndpoint IGMP multicounters. 2383 IGMP IGMPStats 2384 2385 // IP is an aggregation of every NetworkEndpoint's IP statistics. These should 2386 // not be incremented using this field, but using the relevant NetworkEndpoint 2387 // IP multicounters. 2388 IP IPStats 2389 2390 // ARP is an aggregation of every NetworkEndpoint's ARP statistics. These 2391 // should not be incremented using this field, but using the relevant 2392 // NetworkEndpoint ARP multicounters. 2393 ARP ARPStats 2394 2395 // TCP holds TCP-specific stats. 2396 TCP TCPStats 2397 2398 // UDP holds UDP-specific stats. 2399 UDP UDPStats 2400 } 2401 2402 // ReceiveErrors collects packet receive errors within transport endpoint. 2403 // 2404 // +stateify savable 2405 type ReceiveErrors struct { 2406 // ReceiveBufferOverflow is the number of received packets dropped 2407 // due to the receive buffer being full. 2408 ReceiveBufferOverflow StatCounter 2409 2410 // MalformedPacketsReceived is the number of incoming packets 2411 // dropped due to the packet header being in a malformed state. 2412 MalformedPacketsReceived StatCounter 2413 2414 // ClosedReceiver is the number of received packets dropped because 2415 // of receiving endpoint state being closed. 2416 ClosedReceiver StatCounter 2417 2418 // ChecksumErrors is the number of packets dropped due to bad checksums. 2419 ChecksumErrors StatCounter 2420 } 2421 2422 // SendErrors collects packet send errors within the transport layer for an 2423 // endpoint. 2424 // 2425 // +stateify savable 2426 type SendErrors struct { 2427 // SendToNetworkFailed is the number of packets failed to be written to 2428 // the network endpoint. 2429 SendToNetworkFailed StatCounter 2430 2431 // NoRoute is the number of times we failed to resolve IP route. 2432 NoRoute StatCounter 2433 } 2434 2435 // ReadErrors collects segment read errors from an endpoint read call. 2436 // 2437 // +stateify savable 2438 type ReadErrors struct { 2439 // ReadClosed is the number of received packet drops because the endpoint 2440 // was shutdown for read. 2441 ReadClosed StatCounter 2442 2443 // InvalidEndpointState is the number of times we found the endpoint state 2444 // to be unexpected. 2445 InvalidEndpointState StatCounter 2446 2447 // NotConnected is the number of times we tried to read but found that the 2448 // endpoint was not connected. 2449 NotConnected StatCounter 2450 } 2451 2452 // WriteErrors collects packet write errors from an endpoint write call. 2453 // 2454 // +stateify savable 2455 type WriteErrors struct { 2456 // WriteClosed is the number of packet drops because the endpoint 2457 // was shutdown for write. 2458 WriteClosed StatCounter 2459 2460 // InvalidEndpointState is the number of times we found the endpoint state 2461 // to be unexpected. 2462 InvalidEndpointState StatCounter 2463 2464 // InvalidArgs is the number of times invalid input arguments were 2465 // provided for endpoint Write call. 2466 InvalidArgs StatCounter 2467 } 2468 2469 // TransportEndpointStats collects statistics about the endpoint. 2470 // 2471 // +stateify savable 2472 type TransportEndpointStats struct { 2473 // PacketsReceived is the number of successful packet receives. 2474 PacketsReceived StatCounter 2475 2476 // PacketsSent is the number of successful packet sends. 2477 PacketsSent StatCounter 2478 2479 // ReceiveErrors collects packet receive errors within transport layer. 2480 ReceiveErrors ReceiveErrors 2481 2482 // ReadErrors collects packet read errors from an endpoint read call. 2483 ReadErrors ReadErrors 2484 2485 // SendErrors collects packet send errors within the transport layer. 2486 SendErrors SendErrors 2487 2488 // WriteErrors collects packet write errors from an endpoint write call. 2489 WriteErrors WriteErrors 2490 } 2491 2492 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats 2493 // marker interface. 2494 func (*TransportEndpointStats) IsEndpointStats() {} 2495 2496 // InitStatCounters initializes v's fields with nil StatCounter fields to new 2497 // StatCounters. 2498 func InitStatCounters(v reflect.Value) { 2499 for i := 0; i < v.NumField(); i++ { 2500 v := v.Field(i) 2501 if s, ok := v.Addr().Interface().(**StatCounter); ok { 2502 if *s == nil { 2503 *s = new(StatCounter) 2504 } 2505 } else if s, ok := v.Addr().Interface().(**IntegralStatCounterMap); ok { 2506 if *s == nil { 2507 *s = new(IntegralStatCounterMap) 2508 (*s).Init() 2509 } 2510 } else { 2511 InitStatCounters(v) 2512 } 2513 } 2514 } 2515 2516 // FillIn returns a copy of s with nil fields initialized to new StatCounters. 2517 func (s Stats) FillIn() Stats { 2518 InitStatCounters(reflect.ValueOf(&s).Elem()) 2519 return s 2520 } 2521 2522 // Clone clones a copy of the TransportEndpointStats into dst by atomically 2523 // reading each field. 2524 func (src *TransportEndpointStats) Clone(dst *TransportEndpointStats) { 2525 clone(reflect.ValueOf(dst).Elem(), reflect.ValueOf(src).Elem()) 2526 } 2527 2528 func clone(dst reflect.Value, src reflect.Value) { 2529 for i := 0; i < dst.NumField(); i++ { 2530 d := dst.Field(i) 2531 s := src.Field(i) 2532 if c, ok := s.Addr().Interface().(*StatCounter); ok { 2533 d.Addr().Interface().(*StatCounter).IncrementBy(c.Value()) 2534 } else { 2535 clone(d, s) 2536 } 2537 } 2538 } 2539 2540 // String implements the fmt.Stringer interface. 2541 func (a Address) String() string { 2542 switch l := a.Len(); l { 2543 case 4: 2544 return fmt.Sprintf("%d.%d.%d.%d", int(a.addr[0]), int(a.addr[1]), int(a.addr[2]), int(a.addr[3])) 2545 case 16: 2546 // Find the longest subsequence of hexadecimal zeros. 2547 start, end := -1, -1 2548 for i := 0; i < a.Len(); i += 2 { 2549 j := i 2550 for j < a.Len() && a.addr[j] == 0 && a.addr[j+1] == 0 { 2551 j += 2 2552 } 2553 if j > i+2 && j-i > end-start { 2554 start, end = i, j 2555 } 2556 } 2557 2558 var b strings.Builder 2559 for i := 0; i < a.Len(); i += 2 { 2560 if i == start { 2561 b.WriteString("::") 2562 i = end 2563 if end >= a.Len() { 2564 break 2565 } 2566 } else if i > 0 { 2567 b.WriteByte(':') 2568 } 2569 v := uint16(a.addr[i+0])<<8 | uint16(a.addr[i+1]) 2570 if v == 0 { 2571 b.WriteByte('0') 2572 } else { 2573 const digits = "0123456789abcdef" 2574 for i := uint(3); i < 4; i-- { 2575 if v := v >> (i * 4); v != 0 { 2576 b.WriteByte(digits[v&0xf]) 2577 } 2578 } 2579 } 2580 } 2581 return b.String() 2582 default: 2583 return fmt.Sprintf("%x", a.addr[:l]) 2584 } 2585 } 2586 2587 // To4 converts the IPv4 address to a 4-byte representation. 2588 // If the address is not an IPv4 address, To4 returns the empty Address. 2589 func (a Address) To4() Address { 2590 const ( 2591 ipv4len = 4 2592 ipv6len = 16 2593 ) 2594 if a.Len() == ipv4len { 2595 return a 2596 } 2597 if a.Len() == ipv6len && 2598 isZeros(a.addr[:10]) && 2599 a.addr[10] == 0xff && 2600 a.addr[11] == 0xff { 2601 return AddrFrom4Slice(a.addr[12:16]) 2602 } 2603 return Address{} 2604 } 2605 2606 // isZeros reports whether addr is all zeros. 2607 func isZeros(addr []byte) bool { 2608 for _, b := range addr { 2609 if b != 0 { 2610 return false 2611 } 2612 } 2613 return true 2614 } 2615 2616 // LinkAddress is a byte slice cast as a string that represents a link address. 2617 // It is typically a 6-byte MAC address. 2618 type LinkAddress string 2619 2620 // String implements the fmt.Stringer interface. 2621 func (a LinkAddress) String() string { 2622 switch len(a) { 2623 case 6: 2624 return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5]) 2625 default: 2626 return fmt.Sprintf("%x", []byte(a)) 2627 } 2628 } 2629 2630 // ParseMACAddress parses an IEEE 802 address. 2631 // 2632 // It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff. 2633 func ParseMACAddress(s string) (LinkAddress, error) { 2634 parts := strings.FieldsFunc(s, func(c rune) bool { 2635 return c == ':' || c == '-' 2636 }) 2637 if len(parts) != 6 { 2638 return "", fmt.Errorf("inconsistent parts: %s", s) 2639 } 2640 addr := make([]byte, 0, len(parts)) 2641 for _, part := range parts { 2642 u, err := strconv.ParseUint(part, 16, 8) 2643 if err != nil { 2644 return "", fmt.Errorf("invalid hex digits: %s", s) 2645 } 2646 addr = append(addr, byte(u)) 2647 } 2648 return LinkAddress(addr), nil 2649 } 2650 2651 // AddressWithPrefix is an address with its subnet prefix length. 2652 // 2653 // +stateify savable 2654 type AddressWithPrefix struct { 2655 // Address is a network address. 2656 Address Address 2657 2658 // PrefixLen is the subnet prefix length. 2659 PrefixLen int 2660 } 2661 2662 // String implements the fmt.Stringer interface. 2663 func (a AddressWithPrefix) String() string { 2664 return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen) 2665 } 2666 2667 // Subnet converts the address and prefix into a Subnet value and returns it. 2668 func (a AddressWithPrefix) Subnet() Subnet { 2669 addrLen := a.Address.length 2670 if a.PrefixLen <= 0 { 2671 return Subnet{ 2672 address: AddrFromSlice(bytes.Repeat([]byte{0}, addrLen)), 2673 mask: MaskFromBytes(bytes.Repeat([]byte{0}, addrLen)), 2674 } 2675 } 2676 if a.PrefixLen >= addrLen*8 { 2677 return Subnet{ 2678 address: a.Address, 2679 mask: MaskFromBytes(bytes.Repeat([]byte{0xff}, addrLen)), 2680 } 2681 } 2682 2683 sa := Address{length: addrLen} 2684 sm := AddressMask{length: addrLen} 2685 n := uint(a.PrefixLen) 2686 for i := 0; i < addrLen; i++ { 2687 if n >= 8 { 2688 sa.addr[i] = a.Address.addr[i] 2689 sm.mask[i] = 0xff 2690 n -= 8 2691 continue 2692 } 2693 sm.mask[i] = ^byte(0xff >> n) 2694 sa.addr[i] = a.Address.addr[i] & sm.mask[i] 2695 n = 0 2696 } 2697 2698 // For extra caution, call NewSubnet rather than directly creating the Subnet 2699 // value. If that fails it indicates a serious bug in this code, so panic is 2700 // in order. 2701 s, err := NewSubnet(sa, sm) 2702 if err != nil { 2703 panic("invalid subnet: " + err.Error()) 2704 } 2705 return s 2706 } 2707 2708 // ProtocolAddress is an address and the network protocol it is associated 2709 // with. 2710 type ProtocolAddress struct { 2711 // Protocol is the protocol of the address. 2712 Protocol NetworkProtocolNumber 2713 2714 // AddressWithPrefix is a network address with its subnet prefix length. 2715 AddressWithPrefix AddressWithPrefix 2716 } 2717 2718 var ( 2719 // danglingEndpointsMu protects access to danglingEndpoints. 2720 danglingEndpointsMu sync.Mutex 2721 2722 // danglingEndpoints tracks all dangling endpoints no longer owned by the app. 2723 danglingEndpoints = make(map[Endpoint]struct{}) 2724 ) 2725 2726 // GetDanglingEndpoints returns all dangling endpoints. 2727 func GetDanglingEndpoints() []Endpoint { 2728 danglingEndpointsMu.Lock() 2729 es := make([]Endpoint, 0, len(danglingEndpoints)) 2730 for e := range danglingEndpoints { 2731 es = append(es, e) 2732 } 2733 danglingEndpointsMu.Unlock() 2734 return es 2735 } 2736 2737 // ReleaseDanglingEndpoints clears out all all reference counted objects held by 2738 // dangling endpoints. 2739 func ReleaseDanglingEndpoints() { 2740 // Get the dangling endpoints first to avoid locking around Release(), which 2741 // can cause a lock inversion with endpoint.mu and danglingEndpointsMu. 2742 // Calling Release on a dangling endpoint that has been deleted is a noop. 2743 eps := GetDanglingEndpoints() 2744 for _, ep := range eps { 2745 ep.Abort() 2746 } 2747 } 2748 2749 // AddDanglingEndpoint adds a dangling endpoint. 2750 func AddDanglingEndpoint(e Endpoint) { 2751 danglingEndpointsMu.Lock() 2752 danglingEndpoints[e] = struct{}{} 2753 danglingEndpointsMu.Unlock() 2754 } 2755 2756 // DeleteDanglingEndpoint removes a dangling endpoint. 2757 func DeleteDanglingEndpoint(e Endpoint) { 2758 danglingEndpointsMu.Lock() 2759 delete(danglingEndpoints, e) 2760 danglingEndpointsMu.Unlock() 2761 } 2762 2763 // AsyncLoading is the global barrier for asynchronous endpoint loading 2764 // activities. 2765 var AsyncLoading sync.WaitGroup