github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/tcpip/tcpip.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tcpip provides the interfaces and related types that users of the 16 // tcpip stack will use in order to create endpoints used to send and receive 17 // data over the network stack. 18 // 19 // The starting point is the creation and configuration of a stack. A stack can 20 // be created by calling the New() function of the tcpip/stack/stack package; 21 // configuring a stack involves creating NICs (via calls to Stack.CreateNIC()), 22 // adding network addresses (via calls to Stack.AddProtocolAddress()), and 23 // setting a route table (via a call to Stack.SetRouteTable()). 24 // 25 // Once a stack is configured, endpoints can be created by calling 26 // Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect 27 // to peers, listen for connections, accept connections, etc., depending on the 28 // transport protocol selected. 29 package tcpip 30 31 import ( 32 "bytes" 33 "errors" 34 "fmt" 35 "io" 36 "math" 37 "math/bits" 38 "reflect" 39 "strconv" 40 "strings" 41 "time" 42 "unsafe" 43 44 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 45 "github.com/MerlinKodo/gvisor/pkg/sync" 46 "github.com/MerlinKodo/gvisor/pkg/waiter" 47 ) 48 49 // Using the header package here would cause an import cycle. 50 const ( 51 ipv4AddressSize = 4 52 ipv4ProtocolNumber = 0x0800 53 ipv6AddressSize = 16 54 ipv6ProtocolNumber = 0x86dd 55 ) 56 57 // Errors related to Subnet 58 var ( 59 errSubnetLengthMismatch = errors.New("subnet length of address and mask differ") 60 errSubnetAddressMasked = errors.New("subnet address has bits set outside the mask") 61 ) 62 63 // ErrSaveRejection indicates a failed save due to unsupported networking state. 64 // This type of errors is only used for save logic. 65 type ErrSaveRejection struct { 66 Err error 67 } 68 69 // Error returns a sensible description of the save rejection error. 70 func (e *ErrSaveRejection) Error() string { 71 return "save rejected due to unsupported networking state: " + e.Err.Error() 72 } 73 74 // MonotonicTime is a monotonic clock reading. 75 // 76 // +stateify savable 77 type MonotonicTime struct { 78 nanoseconds int64 79 } 80 81 // String implements Stringer. 82 func (mt MonotonicTime) String() string { 83 return strconv.FormatInt(mt.nanoseconds, 10) 84 } 85 86 // MonotonicTimeInfinite returns the monotonic timestamp as far away in the 87 // future as possible. 88 func MonotonicTimeInfinite() MonotonicTime { 89 return MonotonicTime{nanoseconds: math.MaxInt64} 90 } 91 92 // Before reports whether the monotonic clock reading mt is before u. 93 func (mt MonotonicTime) Before(u MonotonicTime) bool { 94 return mt.nanoseconds < u.nanoseconds 95 } 96 97 // After reports whether the monotonic clock reading mt is after u. 98 func (mt MonotonicTime) After(u MonotonicTime) bool { 99 return mt.nanoseconds > u.nanoseconds 100 } 101 102 // Add returns the monotonic clock reading mt+d. 103 func (mt MonotonicTime) Add(d time.Duration) MonotonicTime { 104 return MonotonicTime{ 105 nanoseconds: time.Unix(0, mt.nanoseconds).Add(d).Sub(time.Unix(0, 0)).Nanoseconds(), 106 } 107 } 108 109 // Sub returns the duration mt-u. If the result exceeds the maximum (or minimum) 110 // value that can be stored in a Duration, the maximum (or minimum) duration 111 // will be returned. To compute t-d for a duration d, use t.Add(-d). 112 func (mt MonotonicTime) Sub(u MonotonicTime) time.Duration { 113 return time.Unix(0, mt.nanoseconds).Sub(time.Unix(0, u.nanoseconds)) 114 } 115 116 // A Clock provides the current time and schedules work for execution. 117 // 118 // Times returned by a Clock should always be used for application-visible 119 // time. Only monotonic times should be used for netstack internal timekeeping. 120 type Clock interface { 121 // Now returns the current local time. 122 Now() time.Time 123 124 // NowMonotonic returns the current monotonic clock reading. 125 NowMonotonic() MonotonicTime 126 127 // AfterFunc waits for the duration to elapse and then calls f in its own 128 // goroutine. It returns a Timer that can be used to cancel the call using 129 // its Stop method. 130 AfterFunc(d time.Duration, f func()) Timer 131 } 132 133 // Timer represents a single event. A Timer must be created with 134 // Clock.AfterFunc. 135 type Timer interface { 136 // Stop prevents the Timer from firing. It returns true if the call stops the 137 // timer, false if the timer has already expired or been stopped. 138 // 139 // If Stop returns false, then the timer has already expired and the function 140 // f of Clock.AfterFunc(d, f) has been started in its own goroutine; Stop 141 // does not wait for f to complete before returning. If the caller needs to 142 // know whether f is completed, it must coordinate with f explicitly. 143 Stop() bool 144 145 // Reset changes the timer to expire after duration d. 146 // 147 // Reset should be invoked only on stopped or expired timers. If the timer is 148 // known to have expired, Reset can be used directly. Otherwise, the caller 149 // must coordinate with the function f of Clock.AfterFunc(d, f). 150 Reset(d time.Duration) 151 } 152 153 // Address is a byte slice cast as a string that represents the address of a 154 // network node. Or, in the case of unix endpoints, it may represent a path. 155 // 156 // +stateify savable 157 type Address struct { 158 addr [16]byte 159 length int 160 } 161 162 // AddrFrom4 converts addr to an Address. 163 func AddrFrom4(addr [4]byte) Address { 164 ret := Address{ 165 length: 4, 166 } 167 // It's guaranteed that copy will return 4. 168 copy(ret.addr[:], addr[:]) 169 return ret 170 } 171 172 // AddrFrom4Slice converts addr to an Address. It panics if len(addr) != 4. 173 func AddrFrom4Slice(addr []byte) Address { 174 if len(addr) != 4 { 175 panic(fmt.Sprintf("bad address length for address %v", addr)) 176 } 177 ret := Address{ 178 length: 4, 179 } 180 // It's guaranteed that copy will return 4. 181 copy(ret.addr[:], addr) 182 return ret 183 } 184 185 // AddrFrom16 converts addr to an Address. 186 func AddrFrom16(addr [16]byte) Address { 187 ret := Address{ 188 length: 16, 189 } 190 // It's guaranteed that copy will return 16. 191 copy(ret.addr[:], addr[:]) 192 return ret 193 } 194 195 // AddrFrom16Slice converts addr to an Address. It panics if len(addr) != 16. 196 func AddrFrom16Slice(addr []byte) Address { 197 if len(addr) != 16 { 198 panic(fmt.Sprintf("bad address length for address %v", addr)) 199 } 200 ret := Address{ 201 length: 16, 202 } 203 // It's guaranteed that copy will return 16. 204 copy(ret.addr[:], addr) 205 return ret 206 } 207 208 // AddrFromSlice converts addr to an Address. It returns the Address zero value 209 // if len(addr) != 4 or 16. 210 func AddrFromSlice(addr []byte) Address { 211 switch len(addr) { 212 case ipv4AddressSize: 213 return AddrFrom4Slice(addr) 214 case ipv6AddressSize: 215 return AddrFrom16Slice(addr) 216 } 217 return Address{} 218 } 219 220 // As4 returns a as a 4 byte array. It panics if the address length is not 4. 221 func (a Address) As4() [4]byte { 222 if a.Len() != 4 { 223 panic(fmt.Sprintf("bad address length for address %v", a.addr)) 224 } 225 return *(*[4]byte)((unsafe.Pointer)(&a.addr[0])) 226 } 227 228 // As16 returns a as a 16 byte array. It panics if the address length is not 16. 229 func (a Address) As16() [16]byte { 230 if a.Len() != 16 { 231 panic(fmt.Sprintf("bad address length for address %v", a.addr)) 232 } 233 return a.addr 234 } 235 236 // AsSlice returns a as a byte slice. Callers should be careful as it can 237 // return a window into existing memory. 238 // 239 // +checkescape 240 func (a *Address) AsSlice() []byte { 241 return a.addr[:a.length] 242 } 243 244 // BitLen returns the length in bits of a. 245 func (a Address) BitLen() int { 246 return a.Len() * 8 247 } 248 249 // Len returns the length in bytes of a. 250 func (a Address) Len() int { 251 return a.length 252 } 253 254 // WithPrefix returns the address with a prefix that represents a point subnet. 255 func (a Address) WithPrefix() AddressWithPrefix { 256 return AddressWithPrefix{ 257 Address: a, 258 PrefixLen: a.BitLen(), 259 } 260 } 261 262 // Unspecified returns true if the address is unspecified. 263 func (a Address) Unspecified() bool { 264 for _, b := range a.addr { 265 if b != 0 { 266 return false 267 } 268 } 269 return true 270 } 271 272 // Equal returns whether a and other are equal. It exists for use by the cmp 273 // library. 274 func (a Address) Equal(other Address) bool { 275 return a == other 276 } 277 278 // MatchingPrefix returns the matching prefix length in bits. 279 // 280 // Panics if b and a have different lengths. 281 func (a Address) MatchingPrefix(b Address) uint8 { 282 const bitsInAByte = 8 283 284 if a.Len() != b.Len() { 285 panic(fmt.Sprintf("addresses %s and %s do not have the same length", a, b)) 286 } 287 288 var prefix uint8 289 for i := 0; i < a.length; i++ { 290 aByte := a.addr[i] 291 bByte := b.addr[i] 292 293 if aByte == bByte { 294 prefix += bitsInAByte 295 continue 296 } 297 298 // Count the remaining matching bits in the byte from MSbit to LSBbit. 299 mask := uint8(1) << (bitsInAByte - 1) 300 for { 301 if aByte&mask == bByte&mask { 302 prefix++ 303 mask >>= 1 304 continue 305 } 306 307 break 308 } 309 310 break 311 } 312 313 return prefix 314 } 315 316 // AddressMask is a bitmask for an address. 317 // 318 // +stateify savable 319 type AddressMask struct { 320 mask string 321 } 322 323 // MaskFrom returns a Mask based on str. 324 func MaskFrom(str string) AddressMask { 325 return AddressMask{mask: str} 326 } 327 328 // MaskFromBytes returns a Mask based on bs. 329 func MaskFromBytes(bs []byte) AddressMask { 330 return AddressMask{mask: string(bs)} 331 } 332 333 // String implements Stringer. 334 func (m AddressMask) String() string { 335 return fmt.Sprintf("%x", m.mask) 336 } 337 338 // AsSlice returns a as a byte slice. Callers should be careful as it can 339 // return a window into existing memory. 340 func (m *AddressMask) AsSlice() []byte { 341 return []byte(m.mask) 342 } 343 344 // BitLen returns the length of the mask in bits. 345 func (m AddressMask) BitLen() int { 346 return len(m.mask) * 8 347 } 348 349 // Len returns the length of the mask in bytes. 350 func (m AddressMask) Len() int { 351 return len(m.mask) 352 } 353 354 // Prefix returns the number of bits before the first host bit. 355 func (m AddressMask) Prefix() int { 356 p := 0 357 for _, b := range []byte(m.mask) { 358 p += bits.LeadingZeros8(^b) 359 } 360 return p 361 } 362 363 // Equal returns whether m and other are equal. It exists for use by the cmp 364 // library. 365 func (m AddressMask) Equal(other AddressMask) bool { 366 return m == other 367 } 368 369 // Subnet is a subnet defined by its address and mask. 370 type Subnet struct { 371 address Address 372 mask AddressMask 373 } 374 375 // NewSubnet creates a new Subnet, checking that the address and mask are the same length. 376 func NewSubnet(a Address, m AddressMask) (Subnet, error) { 377 if a.Len() != m.Len() { 378 return Subnet{}, errSubnetLengthMismatch 379 } 380 for i := 0; i < a.Len(); i++ { 381 if a.addr[i]&^m.mask[i] != 0 { 382 return Subnet{}, errSubnetAddressMasked 383 } 384 } 385 return Subnet{a, m}, nil 386 } 387 388 // String implements Stringer. 389 func (s Subnet) String() string { 390 return fmt.Sprintf("%s/%d", s.ID(), s.Prefix()) 391 } 392 393 // Contains returns true iff the address is of the same length and matches the 394 // subnet address and mask. 395 func (s *Subnet) Contains(a Address) bool { 396 if a.Len() != s.address.Len() { 397 return false 398 } 399 for i := 0; i < a.Len(); i++ { 400 if a.addr[i]&s.mask.mask[i] != s.address.addr[i] { 401 return false 402 } 403 } 404 return true 405 } 406 407 // ID returns the subnet ID. 408 func (s *Subnet) ID() Address { 409 return s.address 410 } 411 412 // Bits returns the number of ones (network bits) and zeros (host bits) in the 413 // subnet mask. 414 func (s *Subnet) Bits() (ones int, zeros int) { 415 ones = s.mask.Prefix() 416 return ones, s.mask.BitLen() - ones 417 } 418 419 // Prefix returns the number of bits before the first host bit. 420 func (s *Subnet) Prefix() int { 421 return s.mask.Prefix() 422 } 423 424 // Mask returns the subnet mask. 425 func (s *Subnet) Mask() AddressMask { 426 return s.mask 427 } 428 429 // Broadcast returns the subnet's broadcast address. 430 func (s *Subnet) Broadcast() Address { 431 addrCopy := s.address 432 for i := 0; i < addrCopy.Len(); i++ { 433 addrCopy.addr[i] |= ^s.mask.mask[i] 434 } 435 return addrCopy 436 } 437 438 // IsBroadcast returns true if the address is considered a broadcast address. 439 func (s *Subnet) IsBroadcast(address Address) bool { 440 // Only IPv4 supports the notion of a broadcast address. 441 if address.Len() != ipv4AddressSize { 442 return false 443 } 444 445 // Normally, we would just compare address with the subnet's broadcast 446 // address but there is an exception where a simple comparison is not 447 // correct. This exception is for /31 and /32 IPv4 subnets where all 448 // addresses are considered valid host addresses. 449 // 450 // For /31 subnets, the case is easy. RFC 3021 Section 2.1 states that 451 // both addresses in a /31 subnet "MUST be interpreted as host addresses." 452 // 453 // For /32, the case is a bit more vague. RFC 3021 makes no mention of /32 454 // subnets. However, the same reasoning applies - if an exception is not 455 // made, then there do not exist any host addresses in a /32 subnet. RFC 456 // 4632 Section 3.1 also vaguely implies this interpretation by referring 457 // to addresses in /32 subnets as "host routes." 458 return s.Prefix() <= 30 && s.Broadcast() == address 459 } 460 461 // Equal returns true if this Subnet is equal to the given Subnet. 462 func (s Subnet) Equal(o Subnet) bool { 463 // If this changes, update Route.Equal accordingly. 464 return s == o 465 } 466 467 // NICID is a number that uniquely identifies a NIC. 468 type NICID int32 469 470 // ShutdownFlags represents flags that can be passed to the Shutdown() method 471 // of the Endpoint interface. 472 type ShutdownFlags int 473 474 // Values of the flags that can be passed to the Shutdown() method. They can 475 // be OR'ed together. 476 const ( 477 ShutdownRead ShutdownFlags = 1 << iota 478 ShutdownWrite 479 ) 480 481 // PacketType is used to indicate the destination of the packet. 482 type PacketType uint8 483 484 const ( 485 // PacketHost indicates a packet addressed to the local host. 486 PacketHost PacketType = iota 487 488 // PacketOtherHost indicates an outgoing packet addressed to 489 // another host caught by a NIC in promiscuous mode. 490 PacketOtherHost 491 492 // PacketOutgoing for a packet originating from the local host 493 // that is looped back to a packet socket. 494 PacketOutgoing 495 496 // PacketBroadcast indicates a link layer broadcast packet. 497 PacketBroadcast 498 499 // PacketMulticast indicates a link layer multicast packet. 500 PacketMulticast 501 ) 502 503 // FullAddress represents a full transport node address, as required by the 504 // Connect() and Bind() methods. 505 // 506 // +stateify savable 507 type FullAddress struct { 508 // NIC is the ID of the NIC this address refers to. 509 // 510 // This may not be used by all endpoint types. 511 NIC NICID 512 513 // Addr is the network address. 514 Addr Address 515 516 // Port is the transport port. 517 // 518 // This may not be used by all endpoint types. 519 Port uint16 520 521 // LinkAddr is the link layer address. 522 LinkAddr LinkAddress 523 } 524 525 // Payloader is an interface that provides data. 526 // 527 // This interface allows the endpoint to request the amount of data it needs 528 // based on internal buffers without exposing them. 529 type Payloader interface { 530 io.Reader 531 532 // Len returns the number of bytes of the unread portion of the 533 // Reader. 534 Len() int 535 } 536 537 var _ Payloader = (*bytes.Buffer)(nil) 538 var _ Payloader = (*bytes.Reader)(nil) 539 540 var _ io.Writer = (*SliceWriter)(nil) 541 542 // SliceWriter implements io.Writer for slices. 543 type SliceWriter []byte 544 545 // Write implements io.Writer.Write. 546 func (s *SliceWriter) Write(b []byte) (int, error) { 547 n := copy(*s, b) 548 *s = (*s)[n:] 549 var err error 550 if n != len(b) { 551 err = io.ErrShortWrite 552 } 553 return n, err 554 } 555 556 var _ io.Writer = (*LimitedWriter)(nil) 557 558 // A LimitedWriter writes to W but limits the amount of data copied to just N 559 // bytes. Each call to Write updates N to reflect the new amount remaining. 560 type LimitedWriter struct { 561 W io.Writer 562 N int64 563 } 564 565 func (l *LimitedWriter) Write(p []byte) (int, error) { 566 pLen := int64(len(p)) 567 if pLen > l.N { 568 p = p[:l.N] 569 } 570 n, err := l.W.Write(p) 571 n64 := int64(n) 572 if err == nil && n64 != pLen { 573 err = io.ErrShortWrite 574 } 575 l.N -= n64 576 return n, err 577 } 578 579 // SendableControlMessages contains socket control messages that can be written. 580 // 581 // +stateify savable 582 type SendableControlMessages struct { 583 // HasTTL indicates whether TTL is valid/set. 584 HasTTL bool 585 586 // TTL is the IPv4 Time To Live of the associated packet. 587 TTL uint8 588 589 // HasHopLimit indicates whether HopLimit is valid/set. 590 HasHopLimit bool 591 592 // HopLimit is the IPv6 Hop Limit of the associated packet. 593 HopLimit uint8 594 595 // HasIPv6PacketInfo indicates whether IPv6PacketInfo is set. 596 HasIPv6PacketInfo bool 597 598 // IPv6PacketInfo holds interface and address data on an incoming packet. 599 IPv6PacketInfo IPv6PacketInfo 600 } 601 602 // ReceivableControlMessages contains socket control messages that can be 603 // received. 604 // 605 // +stateify savable 606 type ReceivableControlMessages struct { 607 // Timestamp is the time that the last packet used to create the read data 608 // was received. 609 Timestamp time.Time `state:".(int64)"` 610 611 // HasInq indicates whether Inq is valid/set. 612 HasInq bool 613 614 // Inq is the number of bytes ready to be received. 615 Inq int32 616 617 // HasTOS indicates whether TOS is valid/set. 618 HasTOS bool 619 620 // TOS is the IPv4 type of service of the associated packet. 621 TOS uint8 622 623 // HasTTL indicates whether TTL is valid/set. 624 HasTTL bool 625 626 // TTL is the IPv4 Time To Live of the associated packet. 627 TTL uint8 628 629 // HasHopLimit indicates whether HopLimit is valid/set. 630 HasHopLimit bool 631 632 // HopLimit is the IPv6 Hop Limit of the associated packet. 633 HopLimit uint8 634 635 // HasTimestamp indicates whether Timestamp is valid/set. 636 HasTimestamp bool 637 638 // HasTClass indicates whether TClass is valid/set. 639 HasTClass bool 640 641 // TClass is the IPv6 traffic class of the associated packet. 642 TClass uint32 643 644 // HasIPPacketInfo indicates whether PacketInfo is set. 645 HasIPPacketInfo bool 646 647 // PacketInfo holds interface and address data on an incoming packet. 648 PacketInfo IPPacketInfo 649 650 // HasIPv6PacketInfo indicates whether IPv6PacketInfo is set. 651 HasIPv6PacketInfo bool 652 653 // IPv6PacketInfo holds interface and address data on an incoming packet. 654 IPv6PacketInfo IPv6PacketInfo 655 656 // HasOriginalDestinationAddress indicates whether OriginalDstAddress is 657 // set. 658 HasOriginalDstAddress bool 659 660 // OriginalDestinationAddress holds the original destination address 661 // and port of the incoming packet. 662 OriginalDstAddress FullAddress 663 664 // SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE). 665 SockErr *SockError 666 } 667 668 // PacketOwner is used to get UID and GID of the packet. 669 type PacketOwner interface { 670 // KUID returns KUID of the packet. 671 KUID() uint32 672 673 // KGID returns KGID of the packet. 674 KGID() uint32 675 } 676 677 // ReadOptions contains options for Endpoint.Read. 678 type ReadOptions struct { 679 // Peek indicates whether this read is a peek. 680 Peek bool 681 682 // NeedRemoteAddr indicates whether to return the remote address, if 683 // supported. 684 NeedRemoteAddr bool 685 686 // NeedLinkPacketInfo indicates whether to return the link-layer information, 687 // if supported. 688 NeedLinkPacketInfo bool 689 } 690 691 // ReadResult represents result for a successful Endpoint.Read. 692 type ReadResult struct { 693 // Count is the number of bytes received and written to the buffer. 694 Count int 695 696 // Total is the number of bytes of the received packet. This can be used to 697 // determine whether the read is truncated. 698 Total int 699 700 // ControlMessages is the control messages received. 701 ControlMessages ReceivableControlMessages 702 703 // RemoteAddr is the remote address if ReadOptions.NeedAddr is true. 704 RemoteAddr FullAddress 705 706 // LinkPacketInfo is the link-layer information of the received packet if 707 // ReadOptions.NeedLinkPacketInfo is true. 708 LinkPacketInfo LinkPacketInfo 709 } 710 711 // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) 712 // that exposes functionality like read, write, connect, etc. to users of the 713 // networking stack. 714 type Endpoint interface { 715 // Close puts the endpoint in a closed state and frees all resources 716 // associated with it. Close initiates the teardown process, the 717 // Endpoint may not be fully closed when Close returns. 718 Close() 719 720 // Abort initiates an expedited endpoint teardown. As compared to 721 // Close, Abort prioritizes closing the Endpoint quickly over cleanly. 722 // Abort is best effort; implementing Abort with Close is acceptable. 723 Abort() 724 725 // Read reads data from the endpoint and optionally writes to dst. 726 // 727 // This method does not block if there is no data pending; in this case, 728 // ErrWouldBlock is returned. 729 // 730 // If non-zero number of bytes are successfully read and written to dst, err 731 // must be nil. Otherwise, if dst failed to write anything, ErrBadBuffer 732 // should be returned. 733 Read(io.Writer, ReadOptions) (ReadResult, Error) 734 735 // Write writes data to the endpoint's peer. This method does not block if 736 // the data cannot be written. 737 // 738 // Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes 739 // successfully written to the Endpoint. That is, if a call to 740 // Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and 741 // the caller should not use data[:n] after Write returns. 742 // 743 // Note that unlike io.Writer.Write, it is not an error for Write to 744 // perform a partial write (if n > 0, no error may be returned). Only 745 // stream (TCP) Endpoints may return partial writes, and even then only 746 // in the case where writing additional data would block. Other Endpoints 747 // will either write the entire message or return an error. 748 Write(Payloader, WriteOptions) (int64, Error) 749 750 // Connect connects the endpoint to its peer. Specifying a NIC is 751 // optional. 752 // 753 // There are three classes of return values: 754 // nil -- the attempt to connect succeeded. 755 // ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started 756 // but hasn't completed yet. In this case, the caller must call Connect 757 // or GetSockOpt(ErrorOption) when the endpoint becomes writable to 758 // get the actual result. The first call to Connect after the socket has 759 // connected returns nil. Calling connect again results in ErrAlreadyConnected. 760 // Anything else -- the attempt to connect failed. 761 // 762 // If address.Addr is empty, this means that Endpoint has to be 763 // disconnected if this is supported, otherwise 764 // ErrAddressFamilyNotSupported must be returned. 765 Connect(address FullAddress) Error 766 767 // Disconnect disconnects the endpoint from its peer. 768 Disconnect() Error 769 770 // Shutdown closes the read and/or write end of the endpoint connection 771 // to its peer. 772 Shutdown(flags ShutdownFlags) Error 773 774 // Listen puts the endpoint in "listen" mode, which allows it to accept 775 // new connections. 776 Listen(backlog int) Error 777 778 // Accept returns a new endpoint if a peer has established a connection 779 // to an endpoint previously set to listen mode. This method does not 780 // block if no new connections are available. 781 // 782 // The returned Queue is the wait queue for the newly created endpoint. 783 // 784 // If peerAddr is not nil then it is populated with the peer address of the 785 // returned endpoint. 786 Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, Error) 787 788 // Bind binds the endpoint to a specific local address and port. 789 // Specifying a NIC is optional. 790 Bind(address FullAddress) Error 791 792 // GetLocalAddress returns the address to which the endpoint is bound. 793 GetLocalAddress() (FullAddress, Error) 794 795 // GetRemoteAddress returns the address to which the endpoint is 796 // connected. 797 GetRemoteAddress() (FullAddress, Error) 798 799 // Readiness returns the current readiness of the endpoint. For example, 800 // if waiter.EventIn is set, the endpoint is immediately readable. 801 Readiness(mask waiter.EventMask) waiter.EventMask 802 803 // SetSockOpt sets a socket option. 804 SetSockOpt(opt SettableSocketOption) Error 805 806 // SetSockOptInt sets a socket option, for simple cases where a value 807 // has the int type. 808 SetSockOptInt(opt SockOptInt, v int) Error 809 810 // GetSockOpt gets a socket option. 811 GetSockOpt(opt GettableSocketOption) Error 812 813 // GetSockOptInt gets a socket option for simple cases where a return 814 // value has the int type. 815 GetSockOptInt(SockOptInt) (int, Error) 816 817 // State returns a socket's lifecycle state. The returned value is 818 // protocol-specific and is primarily used for diagnostics. 819 State() uint32 820 821 // ModerateRecvBuf should be called everytime data is copied to the user 822 // space. This allows for dynamic tuning of recv buffer space for a 823 // given socket. 824 // 825 // NOTE: This method is a no-op for sockets other than TCP. 826 ModerateRecvBuf(copied int) 827 828 // Info returns a copy to the transport endpoint info. 829 Info() EndpointInfo 830 831 // Stats returns a reference to the endpoint stats. 832 Stats() EndpointStats 833 834 // SetOwner sets the task owner to the endpoint owner. 835 SetOwner(owner PacketOwner) 836 837 // LastError clears and returns the last error reported by the endpoint. 838 LastError() Error 839 840 // SocketOptions returns the structure which contains all the socket 841 // level options. 842 SocketOptions() *SocketOptions 843 } 844 845 // EndpointWithPreflight is the interface implemented by endpoints that need 846 // to expose the `Preflight` method for preparing the endpoint prior to 847 // calling `Write`. 848 type EndpointWithPreflight interface { 849 // Prepares the endpoint for writes using the provided WriteOptions, 850 // returning an error if the options were incompatible with the endpoint's 851 // current state. 852 Preflight(WriteOptions) Error 853 } 854 855 // LinkPacketInfo holds Link layer information for a received packet. 856 // 857 // +stateify savable 858 type LinkPacketInfo struct { 859 // Protocol is the NetworkProtocolNumber for the packet. 860 Protocol NetworkProtocolNumber 861 862 // PktType is used to indicate the destination of the packet. 863 PktType PacketType 864 } 865 866 // EndpointInfo is the interface implemented by each endpoint info struct. 867 type EndpointInfo interface { 868 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo 869 // marker interface. 870 IsEndpointInfo() 871 } 872 873 // EndpointStats is the interface implemented by each endpoint stats struct. 874 type EndpointStats interface { 875 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats 876 // marker interface. 877 IsEndpointStats() 878 } 879 880 // WriteOptions contains options for Endpoint.Write. 881 type WriteOptions struct { 882 // If To is not nil, write to the given address instead of the endpoint's 883 // peer. 884 To *FullAddress 885 886 // More has the same semantics as Linux's MSG_MORE. 887 More bool 888 889 // EndOfRecord has the same semantics as Linux's MSG_EOR. 890 EndOfRecord bool 891 892 // Atomic means that all data fetched from Payloader must be written to the 893 // endpoint. If Atomic is false, then data fetched from the Payloader may be 894 // discarded if available endpoint buffer space is unsufficient. 895 Atomic bool 896 897 // ControlMessages contains optional overrides used when writing a packet. 898 ControlMessages SendableControlMessages 899 } 900 901 // SockOptInt represents socket options which values have the int type. 902 type SockOptInt int 903 904 const ( 905 // KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to 906 // specify the number of un-ACKed TCP keepalives that will be sent 907 // before the connection is closed. 908 KeepaliveCountOption SockOptInt = iota 909 910 // IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS 911 // for all subsequent outgoing IPv4 packets from the endpoint. 912 IPv4TOSOption 913 914 // IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to 915 // specify TOS for all subsequent outgoing IPv6 packets from the 916 // endpoint. 917 IPv6TrafficClassOption 918 919 // MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the 920 // current Maximum Segment Size(MSS) value as specified using the 921 // TCP_MAXSEG option. 922 MaxSegOption 923 924 // MTUDiscoverOption is used to set/get the path MTU discovery setting. 925 // 926 // NOTE: Setting this option to any other value than PMTUDiscoveryDont 927 // is not supported and will fail as such, and getting this option will 928 // always return PMTUDiscoveryDont. 929 MTUDiscoverOption 930 931 // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control 932 // the default TTL value for multicast messages. The default is 1. 933 MulticastTTLOption 934 935 // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the 936 // number of unread bytes in the input buffer should be returned. 937 ReceiveQueueSizeOption 938 939 // SendQueueSizeOption is used in GetSockOptInt to specify that the 940 // number of unread bytes in the output buffer should be returned. 941 SendQueueSizeOption 942 943 // IPv4TTLOption is used by SetSockOptInt/GetSockOptInt to control the default 944 // TTL value for unicast messages. 945 // 946 // The default is configured by DefaultTTLOption. A UseDefaultIPv4TTL value 947 // configures the endpoint to use the default. 948 IPv4TTLOption 949 950 // IPv6HopLimitOption is used by SetSockOptInt/GetSockOptInt to control the 951 // default hop limit value for unicast messages. 952 // 953 // The default is configured by DefaultTTLOption. A UseDefaultIPv6HopLimit 954 // value configures the endpoint to use the default. 955 IPv6HopLimitOption 956 957 // TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify 958 // the number of SYN retransmits that TCP should send before aborting 959 // the attempt to connect. It cannot exceed 255. 960 // 961 // NOTE: This option is currently only stubbed out and is no-op. 962 TCPSynCountOption 963 964 // TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound 965 // the size of the advertised window to this value. 966 // 967 // NOTE: This option is currently only stubed out and is a no-op 968 TCPWindowClampOption 969 970 // IPv6Checksum is used to request the stack to populate and validate the IPv6 971 // checksum for transport level headers. 972 IPv6Checksum 973 ) 974 975 const ( 976 // UseDefaultIPv4TTL is the IPv4TTLOption value that configures an endpoint to 977 // use the default ttl currently configured by the IPv4 protocol (see 978 // DefaultTTLOption). 979 UseDefaultIPv4TTL = 0 980 981 // UseDefaultIPv6HopLimit is the IPv6HopLimitOption value that configures an 982 // endpoint to use the default hop limit currently configured by the IPv6 983 // protocol (see DefaultTTLOption). 984 UseDefaultIPv6HopLimit = -1 985 ) 986 987 const ( 988 // PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use 989 // per-route settings. 990 PMTUDiscoveryWant int = iota 991 992 // PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable 993 // path MTU discovery. 994 PMTUDiscoveryDont 995 996 // PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do 997 // path MTU discovery. 998 PMTUDiscoveryDo 999 1000 // PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF 1001 // but ignore path MTU. 1002 PMTUDiscoveryProbe 1003 ) 1004 1005 // GettableNetworkProtocolOption is a marker interface for network protocol 1006 // options that may be queried. 1007 type GettableNetworkProtocolOption interface { 1008 isGettableNetworkProtocolOption() 1009 } 1010 1011 // SettableNetworkProtocolOption is a marker interface for network protocol 1012 // options that may be set. 1013 type SettableNetworkProtocolOption interface { 1014 isSettableNetworkProtocolOption() 1015 } 1016 1017 // DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify 1018 // a default TTL. 1019 type DefaultTTLOption uint8 1020 1021 func (*DefaultTTLOption) isGettableNetworkProtocolOption() {} 1022 1023 func (*DefaultTTLOption) isSettableNetworkProtocolOption() {} 1024 1025 // GettableTransportProtocolOption is a marker interface for transport protocol 1026 // options that may be queried. 1027 type GettableTransportProtocolOption interface { 1028 isGettableTransportProtocolOption() 1029 } 1030 1031 // SettableTransportProtocolOption is a marker interface for transport protocol 1032 // options that may be set. 1033 type SettableTransportProtocolOption interface { 1034 isSettableTransportProtocolOption() 1035 } 1036 1037 // TCPSACKEnabled the SACK option for TCP. 1038 // 1039 // See: https://tools.ietf.org/html/rfc2018. 1040 type TCPSACKEnabled bool 1041 1042 func (*TCPSACKEnabled) isGettableTransportProtocolOption() {} 1043 1044 func (*TCPSACKEnabled) isSettableTransportProtocolOption() {} 1045 1046 // TCPRecovery is the loss deteoction algorithm used by TCP. 1047 type TCPRecovery int32 1048 1049 func (*TCPRecovery) isGettableTransportProtocolOption() {} 1050 1051 func (*TCPRecovery) isSettableTransportProtocolOption() {} 1052 1053 // TCPAlwaysUseSynCookies indicates unconditional usage of syncookies. 1054 type TCPAlwaysUseSynCookies bool 1055 1056 func (*TCPAlwaysUseSynCookies) isGettableTransportProtocolOption() {} 1057 1058 func (*TCPAlwaysUseSynCookies) isSettableTransportProtocolOption() {} 1059 1060 const ( 1061 // TCPRACKLossDetection indicates RACK is used for loss detection and 1062 // recovery. 1063 TCPRACKLossDetection TCPRecovery = 1 << iota 1064 1065 // TCPRACKStaticReoWnd indicates the reordering window should not be 1066 // adjusted when DSACK is received. 1067 TCPRACKStaticReoWnd 1068 1069 // TCPRACKNoDupTh indicates RACK should not consider the classic three 1070 // duplicate acknowledgements rule to mark the segments as lost. This 1071 // is used when reordering is not detected. 1072 TCPRACKNoDupTh 1073 ) 1074 1075 // TCPDelayEnabled enables/disables Nagle's algorithm in TCP. 1076 type TCPDelayEnabled bool 1077 1078 func (*TCPDelayEnabled) isGettableTransportProtocolOption() {} 1079 1080 func (*TCPDelayEnabled) isSettableTransportProtocolOption() {} 1081 1082 // TCPSendBufferSizeRangeOption is the send buffer size range for TCP. 1083 type TCPSendBufferSizeRangeOption struct { 1084 Min int 1085 Default int 1086 Max int 1087 } 1088 1089 func (*TCPSendBufferSizeRangeOption) isGettableTransportProtocolOption() {} 1090 1091 func (*TCPSendBufferSizeRangeOption) isSettableTransportProtocolOption() {} 1092 1093 // TCPReceiveBufferSizeRangeOption is the receive buffer size range for TCP. 1094 type TCPReceiveBufferSizeRangeOption struct { 1095 Min int 1096 Default int 1097 Max int 1098 } 1099 1100 func (*TCPReceiveBufferSizeRangeOption) isGettableTransportProtocolOption() {} 1101 1102 func (*TCPReceiveBufferSizeRangeOption) isSettableTransportProtocolOption() {} 1103 1104 // TCPAvailableCongestionControlOption is the supported congestion control 1105 // algorithms for TCP 1106 type TCPAvailableCongestionControlOption string 1107 1108 func (*TCPAvailableCongestionControlOption) isGettableTransportProtocolOption() {} 1109 1110 func (*TCPAvailableCongestionControlOption) isSettableTransportProtocolOption() {} 1111 1112 // TCPModerateReceiveBufferOption enables/disables receive buffer moderation 1113 // for TCP. 1114 type TCPModerateReceiveBufferOption bool 1115 1116 func (*TCPModerateReceiveBufferOption) isGettableTransportProtocolOption() {} 1117 1118 func (*TCPModerateReceiveBufferOption) isSettableTransportProtocolOption() {} 1119 1120 // GettableSocketOption is a marker interface for socket options that may be 1121 // queried. 1122 type GettableSocketOption interface { 1123 isGettableSocketOption() 1124 } 1125 1126 // SettableSocketOption is a marker interface for socket options that may be 1127 // configured. 1128 type SettableSocketOption interface { 1129 isSettableSocketOption() 1130 } 1131 1132 // ICMPv6Filter specifes a filter for ICMPv6 types. 1133 // 1134 // +stateify savable 1135 type ICMPv6Filter struct { 1136 // DenyType indicates if an ICMP type should be blocked. 1137 // 1138 // The ICMPv6 type field is 8 bits so there are up to 256 different ICMPv6 1139 // types. 1140 DenyType [8]uint32 1141 } 1142 1143 // ShouldDeny returns true iff the ICMPv6 Type should be denied. 1144 func (f *ICMPv6Filter) ShouldDeny(icmpType uint8) bool { 1145 const bitsInUint32 = 32 1146 i := icmpType / bitsInUint32 1147 b := icmpType % bitsInUint32 1148 return f.DenyType[i]&(1<<b) != 0 1149 } 1150 1151 func (*ICMPv6Filter) isGettableSocketOption() {} 1152 1153 func (*ICMPv6Filter) isSettableSocketOption() {} 1154 1155 // EndpointState represents the state of an endpoint. 1156 type EndpointState uint8 1157 1158 // CongestionControlState indicates the current congestion control state for 1159 // TCP sender. 1160 type CongestionControlState int 1161 1162 const ( 1163 // Open indicates that the sender is receiving acks in order and 1164 // no loss or dupACK's etc have been detected. 1165 Open CongestionControlState = iota 1166 // RTORecovery indicates that an RTO has occurred and the sender 1167 // has entered an RTO based recovery phase. 1168 RTORecovery 1169 // FastRecovery indicates that the sender has entered FastRecovery 1170 // based on receiving nDupAck's. This state is entered only when 1171 // SACK is not in use. 1172 FastRecovery 1173 // SACKRecovery indicates that the sender has entered SACK based 1174 // recovery. 1175 SACKRecovery 1176 // Disorder indicates the sender either received some SACK blocks 1177 // or dupACK's. 1178 Disorder 1179 ) 1180 1181 // TCPInfoOption is used by GetSockOpt to expose TCP statistics. 1182 // 1183 // TODO(b/64800844): Add and populate stat fields. 1184 type TCPInfoOption struct { 1185 // RTT is the smoothed round trip time. 1186 RTT time.Duration 1187 1188 // RTTVar is the round trip time variation. 1189 RTTVar time.Duration 1190 1191 // RTO is the retransmission timeout for the endpoint. 1192 RTO time.Duration 1193 1194 // State is the current endpoint protocol state. 1195 State EndpointState 1196 1197 // CcState is the congestion control state. 1198 CcState CongestionControlState 1199 1200 // SndCwnd is the congestion window, in packets. 1201 SndCwnd uint32 1202 1203 // SndSsthresh is the threshold between slow start and congestion 1204 // avoidance. 1205 SndSsthresh uint32 1206 1207 // ReorderSeen indicates if reordering is seen in the endpoint. 1208 ReorderSeen bool 1209 } 1210 1211 func (*TCPInfoOption) isGettableSocketOption() {} 1212 1213 // KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a 1214 // connection must remain idle before the first TCP keepalive packet is sent. 1215 // Once this time is reached, KeepaliveIntervalOption is used instead. 1216 type KeepaliveIdleOption time.Duration 1217 1218 func (*KeepaliveIdleOption) isGettableSocketOption() {} 1219 1220 func (*KeepaliveIdleOption) isSettableSocketOption() {} 1221 1222 // KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the 1223 // interval between sending TCP keepalive packets. 1224 type KeepaliveIntervalOption time.Duration 1225 1226 func (*KeepaliveIntervalOption) isGettableSocketOption() {} 1227 1228 func (*KeepaliveIntervalOption) isSettableSocketOption() {} 1229 1230 // TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user 1231 // specified timeout for a given TCP connection. 1232 // See: RFC5482 for details. 1233 type TCPUserTimeoutOption time.Duration 1234 1235 func (*TCPUserTimeoutOption) isGettableSocketOption() {} 1236 1237 func (*TCPUserTimeoutOption) isSettableSocketOption() {} 1238 1239 // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get 1240 // the current congestion control algorithm. 1241 type CongestionControlOption string 1242 1243 func (*CongestionControlOption) isGettableSocketOption() {} 1244 1245 func (*CongestionControlOption) isSettableSocketOption() {} 1246 1247 func (*CongestionControlOption) isGettableTransportProtocolOption() {} 1248 1249 func (*CongestionControlOption) isSettableTransportProtocolOption() {} 1250 1251 // TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the 1252 // maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state 1253 // before being marked closed. 1254 type TCPLingerTimeoutOption time.Duration 1255 1256 func (*TCPLingerTimeoutOption) isGettableSocketOption() {} 1257 1258 func (*TCPLingerTimeoutOption) isSettableSocketOption() {} 1259 1260 func (*TCPLingerTimeoutOption) isGettableTransportProtocolOption() {} 1261 1262 func (*TCPLingerTimeoutOption) isSettableTransportProtocolOption() {} 1263 1264 // TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the 1265 // maximum duration for which a socket lingers in the TIME_WAIT state 1266 // before being marked closed. 1267 type TCPTimeWaitTimeoutOption time.Duration 1268 1269 func (*TCPTimeWaitTimeoutOption) isGettableSocketOption() {} 1270 1271 func (*TCPTimeWaitTimeoutOption) isSettableSocketOption() {} 1272 1273 func (*TCPTimeWaitTimeoutOption) isGettableTransportProtocolOption() {} 1274 1275 func (*TCPTimeWaitTimeoutOption) isSettableTransportProtocolOption() {} 1276 1277 // TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a 1278 // accept to return a completed connection only when there is data to be 1279 // read. This usually means the listening socket will drop the final ACK 1280 // for a handshake till the specified timeout until a segment with data arrives. 1281 type TCPDeferAcceptOption time.Duration 1282 1283 func (*TCPDeferAcceptOption) isGettableSocketOption() {} 1284 1285 func (*TCPDeferAcceptOption) isSettableSocketOption() {} 1286 1287 // TCPMinRTOOption is use by SetSockOpt/GetSockOpt to allow overriding 1288 // default MinRTO used by the Stack. 1289 type TCPMinRTOOption time.Duration 1290 1291 func (*TCPMinRTOOption) isGettableSocketOption() {} 1292 1293 func (*TCPMinRTOOption) isSettableSocketOption() {} 1294 1295 func (*TCPMinRTOOption) isGettableTransportProtocolOption() {} 1296 1297 func (*TCPMinRTOOption) isSettableTransportProtocolOption() {} 1298 1299 // TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding 1300 // default MaxRTO used by the Stack. 1301 type TCPMaxRTOOption time.Duration 1302 1303 func (*TCPMaxRTOOption) isGettableSocketOption() {} 1304 1305 func (*TCPMaxRTOOption) isSettableSocketOption() {} 1306 1307 func (*TCPMaxRTOOption) isGettableTransportProtocolOption() {} 1308 1309 func (*TCPMaxRTOOption) isSettableTransportProtocolOption() {} 1310 1311 // TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the 1312 // maximum number of retransmits after which we time out the connection. 1313 type TCPMaxRetriesOption uint64 1314 1315 func (*TCPMaxRetriesOption) isGettableSocketOption() {} 1316 1317 func (*TCPMaxRetriesOption) isSettableSocketOption() {} 1318 1319 func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {} 1320 1321 func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {} 1322 1323 // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide 1324 // default for number of times SYN is retransmitted before aborting a connect. 1325 type TCPSynRetriesOption uint8 1326 1327 func (*TCPSynRetriesOption) isGettableSocketOption() {} 1328 1329 func (*TCPSynRetriesOption) isSettableSocketOption() {} 1330 1331 func (*TCPSynRetriesOption) isGettableTransportProtocolOption() {} 1332 1333 func (*TCPSynRetriesOption) isSettableTransportProtocolOption() {} 1334 1335 // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a 1336 // default interface for multicast. 1337 type MulticastInterfaceOption struct { 1338 NIC NICID 1339 InterfaceAddr Address 1340 } 1341 1342 func (*MulticastInterfaceOption) isGettableSocketOption() {} 1343 1344 func (*MulticastInterfaceOption) isSettableSocketOption() {} 1345 1346 // MembershipOption is used to identify a multicast membership on an interface. 1347 type MembershipOption struct { 1348 NIC NICID 1349 InterfaceAddr Address 1350 MulticastAddr Address 1351 } 1352 1353 // AddMembershipOption identifies a multicast group to join on some interface. 1354 type AddMembershipOption MembershipOption 1355 1356 func (*AddMembershipOption) isSettableSocketOption() {} 1357 1358 // RemoveMembershipOption identifies a multicast group to leave on some 1359 // interface. 1360 type RemoveMembershipOption MembershipOption 1361 1362 func (*RemoveMembershipOption) isSettableSocketOption() {} 1363 1364 // SocketDetachFilterOption is used by SetSockOpt to detach a previously attached 1365 // classic BPF filter on a given endpoint. 1366 type SocketDetachFilterOption int 1367 1368 func (*SocketDetachFilterOption) isSettableSocketOption() {} 1369 1370 // OriginalDestinationOption is used to get the original destination address 1371 // and port of a redirected packet. 1372 type OriginalDestinationOption FullAddress 1373 1374 func (*OriginalDestinationOption) isGettableSocketOption() {} 1375 1376 // TCPTimeWaitReuseOption is used stack.(*Stack).TransportProtocolOption to 1377 // specify if the stack can reuse the port bound by an endpoint in TIME-WAIT for 1378 // new connections when it is safe from protocol viewpoint. 1379 type TCPTimeWaitReuseOption uint8 1380 1381 func (*TCPTimeWaitReuseOption) isGettableSocketOption() {} 1382 1383 func (*TCPTimeWaitReuseOption) isSettableSocketOption() {} 1384 1385 func (*TCPTimeWaitReuseOption) isGettableTransportProtocolOption() {} 1386 1387 func (*TCPTimeWaitReuseOption) isSettableTransportProtocolOption() {} 1388 1389 const ( 1390 // TCPTimeWaitReuseDisabled indicates reuse of port bound by endponts in TIME-WAIT cannot 1391 // be reused for new connections. 1392 TCPTimeWaitReuseDisabled TCPTimeWaitReuseOption = iota 1393 1394 // TCPTimeWaitReuseGlobal indicates reuse of port bound by endponts in TIME-WAIT can 1395 // be reused for new connections irrespective of the src/dest addresses. 1396 TCPTimeWaitReuseGlobal 1397 1398 // TCPTimeWaitReuseLoopbackOnly indicates reuse of port bound by endpoint in TIME-WAIT can 1399 // only be reused if the connection was a connection over loopback. i.e src/dest adddresses 1400 // are loopback addresses. 1401 TCPTimeWaitReuseLoopbackOnly 1402 ) 1403 1404 // LingerOption is used by SetSockOpt/GetSockOpt to set/get the 1405 // duration for which a socket lingers before returning from Close. 1406 // 1407 // +marshal 1408 // +stateify savable 1409 type LingerOption struct { 1410 Enabled bool 1411 Timeout time.Duration 1412 } 1413 1414 // IPPacketInfo is the message structure for IP_PKTINFO. 1415 // 1416 // +stateify savable 1417 type IPPacketInfo struct { 1418 // NIC is the ID of the NIC to be used. 1419 NIC NICID 1420 1421 // LocalAddr is the local address. 1422 LocalAddr Address 1423 1424 // DestinationAddr is the destination address found in the IP header. 1425 DestinationAddr Address 1426 } 1427 1428 // IPv6PacketInfo is the message structure for IPV6_PKTINFO. 1429 // 1430 // +stateify savable 1431 type IPv6PacketInfo struct { 1432 Addr Address 1433 NIC NICID 1434 } 1435 1436 // SendBufferSizeOption is used by stack.(Stack*).Option/SetOption to 1437 // get/set the default, min and max send buffer sizes. 1438 type SendBufferSizeOption struct { 1439 // Min is the minimum size for send buffer. 1440 Min int 1441 1442 // Default is the default size for send buffer. 1443 Default int 1444 1445 // Max is the maximum size for send buffer. 1446 Max int 1447 } 1448 1449 // ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to 1450 // get/set the default, min and max receive buffer sizes. 1451 type ReceiveBufferSizeOption struct { 1452 // Min is the minimum size for send buffer. 1453 Min int 1454 1455 // Default is the default size for send buffer. 1456 Default int 1457 1458 // Max is the maximum size for send buffer. 1459 Max int 1460 } 1461 1462 // GetSendBufferLimits is used to get the send buffer size limits. 1463 type GetSendBufferLimits func(StackHandler) SendBufferSizeOption 1464 1465 // GetStackSendBufferLimits is used to get default, min and max send buffer size. 1466 func GetStackSendBufferLimits(so StackHandler) SendBufferSizeOption { 1467 var ss SendBufferSizeOption 1468 if err := so.Option(&ss); err != nil { 1469 panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err)) 1470 } 1471 return ss 1472 } 1473 1474 // GetReceiveBufferLimits is used to get the send buffer size limits. 1475 type GetReceiveBufferLimits func(StackHandler) ReceiveBufferSizeOption 1476 1477 // GetStackReceiveBufferLimits is used to get default, min and max send buffer size. 1478 func GetStackReceiveBufferLimits(so StackHandler) ReceiveBufferSizeOption { 1479 var ss ReceiveBufferSizeOption 1480 if err := so.Option(&ss); err != nil { 1481 panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err)) 1482 } 1483 return ss 1484 } 1485 1486 // Route is a row in the routing table. It specifies through which NIC (and 1487 // gateway) sets of packets should be routed. A row is considered viable if the 1488 // masked target address matches the destination address in the row. 1489 type Route struct { 1490 // Destination must contain the target address for this row to be viable. 1491 Destination Subnet 1492 1493 // Gateway is the gateway to be used if this row is viable. 1494 Gateway Address 1495 1496 // NIC is the id of the nic to be used if this row is viable. 1497 NIC NICID 1498 } 1499 1500 // String implements the fmt.Stringer interface. 1501 func (r Route) String() string { 1502 var out strings.Builder 1503 _, _ = fmt.Fprintf(&out, "%s", r.Destination) 1504 if r.Gateway.length > 0 { 1505 _, _ = fmt.Fprintf(&out, " via %s", r.Gateway) 1506 } 1507 _, _ = fmt.Fprintf(&out, " nic %d", r.NIC) 1508 return out.String() 1509 } 1510 1511 // Equal returns true if the given Route is equal to this Route. 1512 func (r Route) Equal(to Route) bool { 1513 // NOTE: This relies on the fact that r.Destination == to.Destination 1514 return r.Destination.Equal(to.Destination) && r.Gateway == to.Gateway && r.NIC == to.NIC 1515 } 1516 1517 // TransportProtocolNumber is the number of a transport protocol. 1518 type TransportProtocolNumber uint32 1519 1520 // NetworkProtocolNumber is the EtherType of a network protocol in an Ethernet 1521 // frame. 1522 // 1523 // See: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml 1524 type NetworkProtocolNumber uint32 1525 1526 // A StatCounter keeps track of a statistic. 1527 // 1528 // +stateify savable 1529 type StatCounter struct { 1530 count atomicbitops.Uint64 1531 } 1532 1533 // Increment adds one to the counter. 1534 func (s *StatCounter) Increment() { 1535 s.IncrementBy(1) 1536 } 1537 1538 // Decrement minuses one to the counter. 1539 func (s *StatCounter) Decrement() { 1540 s.IncrementBy(^uint64(0)) 1541 } 1542 1543 // Value returns the current value of the counter. 1544 func (s *StatCounter) Value() uint64 { 1545 return s.count.Load() 1546 } 1547 1548 // IncrementBy increments the counter by v. 1549 func (s *StatCounter) IncrementBy(v uint64) { 1550 s.count.Add(v) 1551 } 1552 1553 func (s *StatCounter) String() string { 1554 return strconv.FormatUint(s.Value(), 10) 1555 } 1556 1557 // A MultiCounterStat keeps track of two counters at once. 1558 type MultiCounterStat struct { 1559 a *StatCounter 1560 b *StatCounter 1561 } 1562 1563 // Init sets both internal counters to point to a and b. 1564 func (m *MultiCounterStat) Init(a, b *StatCounter) { 1565 m.a = a 1566 m.b = b 1567 } 1568 1569 // Increment adds one to the counters. 1570 func (m *MultiCounterStat) Increment() { 1571 m.a.Increment() 1572 m.b.Increment() 1573 } 1574 1575 // IncrementBy increments the counters by v. 1576 func (m *MultiCounterStat) IncrementBy(v uint64) { 1577 m.a.IncrementBy(v) 1578 m.b.IncrementBy(v) 1579 } 1580 1581 // ICMPv4PacketStats enumerates counts for all ICMPv4 packet types. 1582 type ICMPv4PacketStats struct { 1583 // LINT.IfChange(ICMPv4PacketStats) 1584 1585 // EchoRequest is the number of ICMPv4 echo packets counted. 1586 EchoRequest *StatCounter 1587 1588 // EchoReply is the number of ICMPv4 echo reply packets counted. 1589 EchoReply *StatCounter 1590 1591 // DstUnreachable is the number of ICMPv4 destination unreachable packets 1592 // counted. 1593 DstUnreachable *StatCounter 1594 1595 // SrcQuench is the number of ICMPv4 source quench packets counted. 1596 SrcQuench *StatCounter 1597 1598 // Redirect is the number of ICMPv4 redirect packets counted. 1599 Redirect *StatCounter 1600 1601 // TimeExceeded is the number of ICMPv4 time exceeded packets counted. 1602 TimeExceeded *StatCounter 1603 1604 // ParamProblem is the number of ICMPv4 parameter problem packets counted. 1605 ParamProblem *StatCounter 1606 1607 // Timestamp is the number of ICMPv4 timestamp packets counted. 1608 Timestamp *StatCounter 1609 1610 // TimestampReply is the number of ICMPv4 timestamp reply packets counted. 1611 TimestampReply *StatCounter 1612 1613 // InfoRequest is the number of ICMPv4 information request packets counted. 1614 InfoRequest *StatCounter 1615 1616 // InfoReply is the number of ICMPv4 information reply packets counted. 1617 InfoReply *StatCounter 1618 1619 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4PacketStats) 1620 } 1621 1622 // ICMPv4SentPacketStats collects outbound ICMPv4-specific stats. 1623 type ICMPv4SentPacketStats struct { 1624 // LINT.IfChange(ICMPv4SentPacketStats) 1625 1626 ICMPv4PacketStats 1627 1628 // Dropped is the number of ICMPv4 packets dropped due to link layer errors. 1629 Dropped *StatCounter 1630 1631 // RateLimited is the number of ICMPv4 packets dropped due to rate limit being 1632 // exceeded. 1633 RateLimited *StatCounter 1634 1635 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4SentPacketStats) 1636 } 1637 1638 // ICMPv4ReceivedPacketStats collects inbound ICMPv4-specific stats. 1639 type ICMPv4ReceivedPacketStats struct { 1640 // LINT.IfChange(ICMPv4ReceivedPacketStats) 1641 1642 ICMPv4PacketStats 1643 1644 // Invalid is the number of invalid ICMPv4 packets received. 1645 Invalid *StatCounter 1646 1647 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4ReceivedPacketStats) 1648 } 1649 1650 // ICMPv4Stats collects ICMPv4-specific stats. 1651 type ICMPv4Stats struct { 1652 // LINT.IfChange(ICMPv4Stats) 1653 1654 // PacketsSent contains statistics about sent packets. 1655 PacketsSent ICMPv4SentPacketStats 1656 1657 // PacketsReceived contains statistics about received packets. 1658 PacketsReceived ICMPv4ReceivedPacketStats 1659 1660 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4Stats) 1661 } 1662 1663 // ICMPv6PacketStats enumerates counts for all ICMPv6 packet types. 1664 type ICMPv6PacketStats struct { 1665 // LINT.IfChange(ICMPv6PacketStats) 1666 1667 // EchoRequest is the number of ICMPv6 echo request packets counted. 1668 EchoRequest *StatCounter 1669 1670 // EchoReply is the number of ICMPv6 echo reply packets counted. 1671 EchoReply *StatCounter 1672 1673 // DstUnreachable is the number of ICMPv6 destination unreachable packets 1674 // counted. 1675 DstUnreachable *StatCounter 1676 1677 // PacketTooBig is the number of ICMPv6 packet too big packets counted. 1678 PacketTooBig *StatCounter 1679 1680 // TimeExceeded is the number of ICMPv6 time exceeded packets counted. 1681 TimeExceeded *StatCounter 1682 1683 // ParamProblem is the number of ICMPv6 parameter problem packets counted. 1684 ParamProblem *StatCounter 1685 1686 // RouterSolicit is the number of ICMPv6 router solicit packets counted. 1687 RouterSolicit *StatCounter 1688 1689 // RouterAdvert is the number of ICMPv6 router advert packets counted. 1690 RouterAdvert *StatCounter 1691 1692 // NeighborSolicit is the number of ICMPv6 neighbor solicit packets counted. 1693 NeighborSolicit *StatCounter 1694 1695 // NeighborAdvert is the number of ICMPv6 neighbor advert packets counted. 1696 NeighborAdvert *StatCounter 1697 1698 // RedirectMsg is the number of ICMPv6 redirect message packets counted. 1699 RedirectMsg *StatCounter 1700 1701 // MulticastListenerQuery is the number of Multicast Listener Query messages 1702 // counted. 1703 MulticastListenerQuery *StatCounter 1704 1705 // MulticastListenerReport is the number of Multicast Listener Report messages 1706 // counted. 1707 MulticastListenerReport *StatCounter 1708 1709 // MulticastListenerReportV2 is the number of Multicast Listener Report 1710 // messages counted. 1711 MulticastListenerReportV2 *StatCounter 1712 1713 // MulticastListenerDone is the number of Multicast Listener Done messages 1714 // counted. 1715 MulticastListenerDone *StatCounter 1716 1717 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6PacketStats) 1718 } 1719 1720 // ICMPv6SentPacketStats collects outbound ICMPv6-specific stats. 1721 type ICMPv6SentPacketStats struct { 1722 // LINT.IfChange(ICMPv6SentPacketStats) 1723 1724 ICMPv6PacketStats 1725 1726 // Dropped is the number of ICMPv6 packets dropped due to link layer errors. 1727 Dropped *StatCounter 1728 1729 // RateLimited is the number of ICMPv6 packets dropped due to rate limit being 1730 // exceeded. 1731 RateLimited *StatCounter 1732 1733 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6SentPacketStats) 1734 } 1735 1736 // ICMPv6ReceivedPacketStats collects inbound ICMPv6-specific stats. 1737 type ICMPv6ReceivedPacketStats struct { 1738 // LINT.IfChange(ICMPv6ReceivedPacketStats) 1739 1740 ICMPv6PacketStats 1741 1742 // Unrecognized is the number of ICMPv6 packets received that the transport 1743 // layer does not know how to parse. 1744 Unrecognized *StatCounter 1745 1746 // Invalid is the number of invalid ICMPv6 packets received. 1747 Invalid *StatCounter 1748 1749 // RouterOnlyPacketsDroppedByHost is the number of ICMPv6 packets dropped due 1750 // to being router-specific packets. 1751 RouterOnlyPacketsDroppedByHost *StatCounter 1752 1753 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6ReceivedPacketStats) 1754 } 1755 1756 // ICMPv6Stats collects ICMPv6-specific stats. 1757 type ICMPv6Stats struct { 1758 // LINT.IfChange(ICMPv6Stats) 1759 1760 // PacketsSent contains statistics about sent packets. 1761 PacketsSent ICMPv6SentPacketStats 1762 1763 // PacketsReceived contains statistics about received packets. 1764 PacketsReceived ICMPv6ReceivedPacketStats 1765 1766 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6Stats) 1767 } 1768 1769 // ICMPStats collects ICMP-specific stats (both v4 and v6). 1770 type ICMPStats struct { 1771 // V4 contains the ICMPv4-specifics stats. 1772 V4 ICMPv4Stats 1773 1774 // V6 contains the ICMPv4-specifics stats. 1775 V6 ICMPv6Stats 1776 } 1777 1778 // IGMPPacketStats enumerates counts for all IGMP packet types. 1779 type IGMPPacketStats struct { 1780 // LINT.IfChange(IGMPPacketStats) 1781 1782 // MembershipQuery is the number of Membership Query messages counted. 1783 MembershipQuery *StatCounter 1784 1785 // V1MembershipReport is the number of Version 1 Membership Report messages 1786 // counted. 1787 V1MembershipReport *StatCounter 1788 1789 // V2MembershipReport is the number of Version 2 Membership Report messages 1790 // counted. 1791 V2MembershipReport *StatCounter 1792 1793 // V3MembershipReport is the number of Version 3 Membership Report messages 1794 // counted. 1795 V3MembershipReport *StatCounter 1796 1797 // LeaveGroup is the number of Leave Group messages counted. 1798 LeaveGroup *StatCounter 1799 1800 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPPacketStats) 1801 } 1802 1803 // IGMPSentPacketStats collects outbound IGMP-specific stats. 1804 type IGMPSentPacketStats struct { 1805 // LINT.IfChange(IGMPSentPacketStats) 1806 1807 IGMPPacketStats 1808 1809 // Dropped is the number of IGMP packets dropped. 1810 Dropped *StatCounter 1811 1812 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPSentPacketStats) 1813 } 1814 1815 // IGMPReceivedPacketStats collects inbound IGMP-specific stats. 1816 type IGMPReceivedPacketStats struct { 1817 // LINT.IfChange(IGMPReceivedPacketStats) 1818 1819 IGMPPacketStats 1820 1821 // Invalid is the number of invalid IGMP packets received. 1822 Invalid *StatCounter 1823 1824 // ChecksumErrors is the number of IGMP packets dropped due to bad checksums. 1825 ChecksumErrors *StatCounter 1826 1827 // Unrecognized is the number of unrecognized messages counted, these are 1828 // silently ignored for forward-compatibilty. 1829 Unrecognized *StatCounter 1830 1831 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPReceivedPacketStats) 1832 } 1833 1834 // IGMPStats collects IGMP-specific stats. 1835 type IGMPStats struct { 1836 // LINT.IfChange(IGMPStats) 1837 1838 // PacketsSent contains statistics about sent packets. 1839 PacketsSent IGMPSentPacketStats 1840 1841 // PacketsReceived contains statistics about received packets. 1842 PacketsReceived IGMPReceivedPacketStats 1843 1844 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPStats) 1845 } 1846 1847 // IPForwardingStats collects stats related to IP forwarding (both v4 and v6). 1848 type IPForwardingStats struct { 1849 // LINT.IfChange(IPForwardingStats) 1850 1851 // Unrouteable is the number of IP packets received which were dropped 1852 // because a route to their destination could not be constructed. 1853 Unrouteable *StatCounter 1854 1855 // ExhaustedTTL is the number of IP packets received which were dropped 1856 // because their TTL was exhausted. 1857 ExhaustedTTL *StatCounter 1858 1859 // InitializingSource is the number of IP packets which were dropped 1860 // because they contained a source address that may only be used on the local 1861 // network as part of initialization work. 1862 InitializingSource *StatCounter 1863 1864 // LinkLocalSource is the number of IP packets which were dropped 1865 // because they contained a link-local source address. 1866 LinkLocalSource *StatCounter 1867 1868 // LinkLocalDestination is the number of IP packets which were dropped 1869 // because they contained a link-local destination address. 1870 LinkLocalDestination *StatCounter 1871 1872 // PacketTooBig is the number of IP packets which were dropped because they 1873 // were too big for the outgoing MTU. 1874 PacketTooBig *StatCounter 1875 1876 // HostUnreachable is the number of IP packets received which could not be 1877 // successfully forwarded due to an unresolvable next hop. 1878 HostUnreachable *StatCounter 1879 1880 // ExtensionHeaderProblem is the number of IP packets which were dropped 1881 // because of a problem encountered when processing an IPv6 extension 1882 // header. 1883 ExtensionHeaderProblem *StatCounter 1884 1885 // UnexpectedMulticastInputInterface is the number of multicast packets that 1886 // were received on an interface that did not match the corresponding route's 1887 // expected input interface. 1888 UnexpectedMulticastInputInterface *StatCounter 1889 1890 // UnknownOutputEndpoint is the number of packets that could not be forwarded 1891 // because the output endpoint could not be found. 1892 UnknownOutputEndpoint *StatCounter 1893 1894 // NoMulticastPendingQueueBufferSpace is the number of multicast packets that 1895 // were dropped due to insufficent buffer space in the pending packet queue. 1896 NoMulticastPendingQueueBufferSpace *StatCounter 1897 1898 // OutgoingDeviceNoBufferSpace is the number of packets that were dropped due 1899 // to insufficient space in the outgoing device. 1900 OutgoingDeviceNoBufferSpace *StatCounter 1901 1902 // Errors is the number of IP packets received which could not be 1903 // successfully forwarded. 1904 Errors *StatCounter 1905 1906 // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPForwardingStats) 1907 } 1908 1909 // IPStats collects IP-specific stats (both v4 and v6). 1910 type IPStats struct { 1911 // LINT.IfChange(IPStats) 1912 1913 // PacketsReceived is the number of IP packets received from the link layer. 1914 PacketsReceived *StatCounter 1915 1916 // ValidPacketsReceived is the number of valid IP packets that reached the IP 1917 // layer. 1918 ValidPacketsReceived *StatCounter 1919 1920 // DisabledPacketsReceived is the number of IP packets received from the link 1921 // layer when the IP layer is disabled. 1922 DisabledPacketsReceived *StatCounter 1923 1924 // InvalidDestinationAddressesReceived is the number of IP packets received 1925 // with an unknown or invalid destination address. 1926 InvalidDestinationAddressesReceived *StatCounter 1927 1928 // InvalidSourceAddressesReceived is the number of IP packets received with a 1929 // source address that should never have been received on the wire. 1930 InvalidSourceAddressesReceived *StatCounter 1931 1932 // PacketsDelivered is the number of incoming IP packets that are successfully 1933 // delivered to the transport layer. 1934 PacketsDelivered *StatCounter 1935 1936 // PacketsSent is the number of IP packets sent via WritePacket. 1937 PacketsSent *StatCounter 1938 1939 // OutgoingPacketErrors is the number of IP packets which failed to write to a 1940 // link-layer endpoint. 1941 OutgoingPacketErrors *StatCounter 1942 1943 // MalformedPacketsReceived is the number of IP Packets that were dropped due 1944 // to the IP packet header failing validation checks. 1945 MalformedPacketsReceived *StatCounter 1946 1947 // MalformedFragmentsReceived is the number of IP Fragments that were dropped 1948 // due to the fragment failing validation checks. 1949 MalformedFragmentsReceived *StatCounter 1950 1951 // IPTablesPreroutingDropped is the number of IP packets dropped in the 1952 // Prerouting chain. 1953 IPTablesPreroutingDropped *StatCounter 1954 1955 // IPTablesInputDropped is the number of IP packets dropped in the Input 1956 // chain. 1957 IPTablesInputDropped *StatCounter 1958 1959 // IPTablesForwardDropped is the number of IP packets dropped in the Forward 1960 // chain. 1961 IPTablesForwardDropped *StatCounter 1962 1963 // IPTablesOutputDropped is the number of IP packets dropped in the Output 1964 // chain. 1965 IPTablesOutputDropped *StatCounter 1966 1967 // IPTablesPostroutingDropped is the number of IP packets dropped in the 1968 // Postrouting chain. 1969 IPTablesPostroutingDropped *StatCounter 1970 1971 // TODO(https://gvisor.dev/issues/5529): Move the IPv4-only option stats out 1972 // of IPStats. 1973 // OptionTimestampReceived is the number of Timestamp options seen. 1974 OptionTimestampReceived *StatCounter 1975 1976 // OptionRecordRouteReceived is the number of Record Route options seen. 1977 OptionRecordRouteReceived *StatCounter 1978 1979 // OptionRouterAlertReceived is the number of Router Alert options seen. 1980 OptionRouterAlertReceived *StatCounter 1981 1982 // OptionUnknownReceived is the number of unknown IP options seen. 1983 OptionUnknownReceived *StatCounter 1984 1985 // Forwarding collects stats related to IP forwarding. 1986 Forwarding IPForwardingStats 1987 1988 // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPStats) 1989 } 1990 1991 // ARPStats collects ARP-specific stats. 1992 type ARPStats struct { 1993 // LINT.IfChange(ARPStats) 1994 1995 // PacketsReceived is the number of ARP packets received from the link layer. 1996 PacketsReceived *StatCounter 1997 1998 // DisabledPacketsReceived is the number of ARP packets received from the link 1999 // layer when the ARP layer is disabled. 2000 DisabledPacketsReceived *StatCounter 2001 2002 // MalformedPacketsReceived is the number of ARP packets that were dropped due 2003 // to being malformed. 2004 MalformedPacketsReceived *StatCounter 2005 2006 // RequestsReceived is the number of ARP requests received. 2007 RequestsReceived *StatCounter 2008 2009 // RequestsReceivedUnknownTargetAddress is the number of ARP requests that 2010 // were targeted to an interface different from the one it was received on. 2011 RequestsReceivedUnknownTargetAddress *StatCounter 2012 2013 // OutgoingRequestInterfaceHasNoLocalAddressErrors is the number of failures 2014 // to send an ARP request because the interface has no network address 2015 // assigned to it. 2016 OutgoingRequestInterfaceHasNoLocalAddressErrors *StatCounter 2017 2018 // OutgoingRequestBadLocalAddressErrors is the number of failures to send an 2019 // ARP request with a bad local address. 2020 OutgoingRequestBadLocalAddressErrors *StatCounter 2021 2022 // OutgoingRequestsDropped is the number of ARP requests which failed to write 2023 // to a link-layer endpoint. 2024 OutgoingRequestsDropped *StatCounter 2025 2026 // OutgoingRequestSent is the number of ARP requests successfully written to a 2027 // link-layer endpoint. 2028 OutgoingRequestsSent *StatCounter 2029 2030 // RepliesReceived is the number of ARP replies received. 2031 RepliesReceived *StatCounter 2032 2033 // OutgoingRepliesDropped is the number of ARP replies which failed to write 2034 // to a link-layer endpoint. 2035 OutgoingRepliesDropped *StatCounter 2036 2037 // OutgoingRepliesSent is the number of ARP replies successfully written to a 2038 // link-layer endpoint. 2039 OutgoingRepliesSent *StatCounter 2040 2041 // LINT.ThenChange(network/arp/stats.go:multiCounterARPStats) 2042 } 2043 2044 // TCPStats collects TCP-specific stats. 2045 type TCPStats struct { 2046 // ActiveConnectionOpenings is the number of connections opened 2047 // successfully via Connect. 2048 ActiveConnectionOpenings *StatCounter 2049 2050 // PassiveConnectionOpenings is the number of connections opened 2051 // successfully via Listen. 2052 PassiveConnectionOpenings *StatCounter 2053 2054 // CurrentEstablished is the number of TCP connections for which the 2055 // current state is ESTABLISHED. 2056 CurrentEstablished *StatCounter 2057 2058 // CurrentConnected is the number of TCP connections that 2059 // are in connected state. 2060 CurrentConnected *StatCounter 2061 2062 // EstablishedResets is the number of times TCP connections have made 2063 // a direct transition to the CLOSED state from either the 2064 // ESTABLISHED state or the CLOSE-WAIT state. 2065 EstablishedResets *StatCounter 2066 2067 // EstablishedClosed is the number of times established TCP connections 2068 // made a transition to CLOSED state. 2069 EstablishedClosed *StatCounter 2070 2071 // EstablishedTimedout is the number of times an established connection 2072 // was reset because of keep-alive time out. 2073 EstablishedTimedout *StatCounter 2074 2075 // ListenOverflowSynDrop is the number of times the listen queue overflowed 2076 // and a SYN was dropped. 2077 ListenOverflowSynDrop *StatCounter 2078 2079 // ListenOverflowAckDrop is the number of times the final ACK 2080 // in the handshake was dropped due to overflow. 2081 ListenOverflowAckDrop *StatCounter 2082 2083 // ListenOverflowCookieSent is the number of times a SYN cookie was sent. 2084 ListenOverflowSynCookieSent *StatCounter 2085 2086 // ListenOverflowSynCookieRcvd is the number of times a valid SYN 2087 // cookie was received. 2088 ListenOverflowSynCookieRcvd *StatCounter 2089 2090 // ListenOverflowInvalidSynCookieRcvd is the number of times an invalid SYN cookie 2091 // was received. 2092 ListenOverflowInvalidSynCookieRcvd *StatCounter 2093 2094 // FailedConnectionAttempts is the number of calls to Connect or Listen 2095 // (active and passive openings, respectively) that end in an error. 2096 FailedConnectionAttempts *StatCounter 2097 2098 // ValidSegmentsReceived is the number of TCP segments received that 2099 // the transport layer successfully parsed. 2100 ValidSegmentsReceived *StatCounter 2101 2102 // InvalidSegmentsReceived is the number of TCP segments received that 2103 // the transport layer could not parse. 2104 InvalidSegmentsReceived *StatCounter 2105 2106 // SegmentsSent is the number of TCP segments sent. 2107 SegmentsSent *StatCounter 2108 2109 // SegmentSendErrors is the number of TCP segments failed to be sent. 2110 SegmentSendErrors *StatCounter 2111 2112 // ResetsSent is the number of TCP resets sent. 2113 ResetsSent *StatCounter 2114 2115 // ResetsReceived is the number of TCP resets received. 2116 ResetsReceived *StatCounter 2117 2118 // Retransmits is the number of TCP segments retransmitted. 2119 Retransmits *StatCounter 2120 2121 // FastRecovery is the number of times Fast Recovery was used to 2122 // recover from packet loss. 2123 FastRecovery *StatCounter 2124 2125 // SACKRecovery is the number of times SACK Recovery was used to 2126 // recover from packet loss. 2127 SACKRecovery *StatCounter 2128 2129 // TLPRecovery is the number of times recovery was accomplished by the tail 2130 // loss probe. 2131 TLPRecovery *StatCounter 2132 2133 // SlowStartRetransmits is the number of segments retransmitted in slow 2134 // start. 2135 SlowStartRetransmits *StatCounter 2136 2137 // FastRetransmit is the number of segments retransmitted in fast 2138 // recovery. 2139 FastRetransmit *StatCounter 2140 2141 // Timeouts is the number of times the RTO expired. 2142 Timeouts *StatCounter 2143 2144 // ChecksumErrors is the number of segments dropped due to bad checksums. 2145 ChecksumErrors *StatCounter 2146 2147 // FailedPortReservations is the number of times TCP failed to reserve 2148 // a port. 2149 FailedPortReservations *StatCounter 2150 2151 // SegmentsAckedWithDSACK is the number of segments acknowledged with 2152 // DSACK. 2153 SegmentsAckedWithDSACK *StatCounter 2154 2155 // SpuriousRecovery is the number of times the connection entered loss 2156 // recovery spuriously. 2157 SpuriousRecovery *StatCounter 2158 2159 // SpuriousRTORecovery is the number of spurious RTOs. 2160 SpuriousRTORecovery *StatCounter 2161 2162 // ForwardMaxInFlightDrop is the number of connection requests that are 2163 // dropped due to exceeding the maximum number of in-flight connection 2164 // requests. 2165 ForwardMaxInFlightDrop *StatCounter 2166 } 2167 2168 // UDPStats collects UDP-specific stats. 2169 type UDPStats struct { 2170 // PacketsReceived is the number of UDP datagrams received via 2171 // HandlePacket. 2172 PacketsReceived *StatCounter 2173 2174 // UnknownPortErrors is the number of incoming UDP datagrams dropped 2175 // because they did not have a known destination port. 2176 UnknownPortErrors *StatCounter 2177 2178 // ReceiveBufferErrors is the number of incoming UDP datagrams dropped 2179 // due to the receiving buffer being in an invalid state. 2180 ReceiveBufferErrors *StatCounter 2181 2182 // MalformedPacketsReceived is the number of incoming UDP datagrams 2183 // dropped due to the UDP header being in a malformed state. 2184 MalformedPacketsReceived *StatCounter 2185 2186 // PacketsSent is the number of UDP datagrams sent via sendUDP. 2187 PacketsSent *StatCounter 2188 2189 // PacketSendErrors is the number of datagrams failed to be sent. 2190 PacketSendErrors *StatCounter 2191 2192 // ChecksumErrors is the number of datagrams dropped due to bad checksums. 2193 ChecksumErrors *StatCounter 2194 } 2195 2196 // NICNeighborStats holds metrics for the neighbor table. 2197 type NICNeighborStats struct { 2198 // LINT.IfChange(NICNeighborStats) 2199 2200 // UnreachableEntryLookups counts the number of lookups performed on an 2201 // entry in Unreachable state. 2202 UnreachableEntryLookups *StatCounter 2203 2204 // DroppedConfirmationForNoninitiatedNeighbor counts the number of neighbor 2205 // responses that were dropped because they didn't match an entry in the 2206 // cache. 2207 DroppedConfirmationForNoninitiatedNeighbor *StatCounter 2208 2209 // DroppedInvalidLinkAddressConfirmations counts the number of neighbor 2210 // responses that were ignored because they had an invalid source link-layer 2211 // address. 2212 DroppedInvalidLinkAddressConfirmations *StatCounter 2213 2214 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICNeighborStats) 2215 } 2216 2217 // NICPacketStats holds basic packet statistics. 2218 type NICPacketStats struct { 2219 // LINT.IfChange(NICPacketStats) 2220 2221 // Packets is the number of packets counted. 2222 Packets *StatCounter 2223 2224 // Bytes is the number of bytes counted. 2225 Bytes *StatCounter 2226 2227 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICPacketStats) 2228 } 2229 2230 // IntegralStatCounterMap holds a map associating integral keys with 2231 // StatCounters. 2232 type IntegralStatCounterMap struct { 2233 mu sync.RWMutex 2234 // +checklocks:mu 2235 counterMap map[uint64]*StatCounter 2236 } 2237 2238 // Keys returns all keys present in the map. 2239 func (m *IntegralStatCounterMap) Keys() []uint64 { 2240 m.mu.RLock() 2241 defer m.mu.RUnlock() 2242 var keys []uint64 2243 for k := range m.counterMap { 2244 keys = append(keys, k) 2245 } 2246 return keys 2247 } 2248 2249 // Get returns the counter mapped by the provided key. 2250 func (m *IntegralStatCounterMap) Get(key uint64) (*StatCounter, bool) { 2251 m.mu.RLock() 2252 defer m.mu.RUnlock() 2253 counter, ok := m.counterMap[key] 2254 return counter, ok 2255 } 2256 2257 // Init initializes the map. 2258 func (m *IntegralStatCounterMap) Init() { 2259 m.mu.Lock() 2260 defer m.mu.Unlock() 2261 m.counterMap = make(map[uint64]*StatCounter) 2262 } 2263 2264 // Increment increments the counter associated with the provided key. 2265 func (m *IntegralStatCounterMap) Increment(key uint64) { 2266 m.mu.RLock() 2267 counter, ok := m.counterMap[key] 2268 m.mu.RUnlock() 2269 2270 if !ok { 2271 m.mu.Lock() 2272 counter, ok = m.counterMap[key] 2273 if !ok { 2274 counter = new(StatCounter) 2275 m.counterMap[key] = counter 2276 } 2277 m.mu.Unlock() 2278 } 2279 counter.Increment() 2280 } 2281 2282 // A MultiIntegralStatCounterMap keeps track of two integral counter maps at 2283 // once. 2284 type MultiIntegralStatCounterMap struct { 2285 a *IntegralStatCounterMap 2286 b *IntegralStatCounterMap 2287 } 2288 2289 // Init sets the internal integral counter maps to point to a and b. 2290 func (m *MultiIntegralStatCounterMap) Init(a, b *IntegralStatCounterMap) { 2291 m.a = a 2292 m.b = b 2293 } 2294 2295 // Increment increments the counter in each map corresponding to the 2296 // provided key. 2297 func (m *MultiIntegralStatCounterMap) Increment(key uint64) { 2298 m.a.Increment(key) 2299 m.b.Increment(key) 2300 } 2301 2302 // NICStats holds NIC statistics. 2303 type NICStats struct { 2304 // LINT.IfChange(NICStats) 2305 2306 // UnknownL3ProtocolRcvdPacketCounts records the number of packets recieved 2307 // for each unknown or unsupported netowrk protocol number. 2308 UnknownL3ProtocolRcvdPacketCounts *IntegralStatCounterMap 2309 2310 // UnknownL4ProtocolRcvdPacketCounts records the number of packets recieved 2311 // for each unknown or unsupported transport protocol number. 2312 UnknownL4ProtocolRcvdPacketCounts *IntegralStatCounterMap 2313 2314 // MalformedL4RcvdPackets is the number of packets received by a NIC that 2315 // could not be delivered to a transport endpoint because the L4 header could 2316 // not be parsed. 2317 MalformedL4RcvdPackets *StatCounter 2318 2319 // Tx contains statistics about transmitted packets. 2320 Tx NICPacketStats 2321 2322 // TxPacketsDroppedNoBufferSpace is the number of packets dropepd due to the 2323 // NIC not having enough buffer space to send the packet. 2324 // 2325 // Packets may be dropped with a no buffer space error when the device TX 2326 // queue is full. 2327 TxPacketsDroppedNoBufferSpace *StatCounter 2328 2329 // Rx contains statistics about received packets. 2330 Rx NICPacketStats 2331 2332 // DisabledRx contains statistics about received packets on disabled NICs. 2333 DisabledRx NICPacketStats 2334 2335 // Neighbor contains statistics about neighbor entries. 2336 Neighbor NICNeighborStats 2337 2338 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICStats) 2339 } 2340 2341 // FillIn returns a copy of s with nil fields initialized to new StatCounters. 2342 func (s NICStats) FillIn() NICStats { 2343 InitStatCounters(reflect.ValueOf(&s).Elem()) 2344 return s 2345 } 2346 2347 // Stats holds statistics about the networking stack. 2348 type Stats struct { 2349 // TODO(https://gvisor.dev/issues/5986): Make the DroppedPackets stat less 2350 // ambiguous. 2351 2352 // DroppedPackets is the number of packets dropped at the transport layer. 2353 DroppedPackets *StatCounter 2354 2355 // NICs is an aggregation of every NIC's statistics. These should not be 2356 // incremented using this field, but using the relevant NIC multicounters. 2357 NICs NICStats 2358 2359 // ICMP is an aggregation of every NetworkEndpoint's ICMP statistics (both v4 2360 // and v6). These should not be incremented using this field, but using the 2361 // relevant NetworkEndpoint ICMP multicounters. 2362 ICMP ICMPStats 2363 2364 // IGMP is an aggregation of every NetworkEndpoint's IGMP statistics. These 2365 // should not be incremented using this field, but using the relevant 2366 // NetworkEndpoint IGMP multicounters. 2367 IGMP IGMPStats 2368 2369 // IP is an aggregation of every NetworkEndpoint's IP statistics. These should 2370 // not be incremented using this field, but using the relevant NetworkEndpoint 2371 // IP multicounters. 2372 IP IPStats 2373 2374 // ARP is an aggregation of every NetworkEndpoint's ARP statistics. These 2375 // should not be incremented using this field, but using the relevant 2376 // NetworkEndpoint ARP multicounters. 2377 ARP ARPStats 2378 2379 // TCP holds TCP-specific stats. 2380 TCP TCPStats 2381 2382 // UDP holds UDP-specific stats. 2383 UDP UDPStats 2384 } 2385 2386 // ReceiveErrors collects packet receive errors within transport endpoint. 2387 // 2388 // +stateify savable 2389 type ReceiveErrors struct { 2390 // ReceiveBufferOverflow is the number of received packets dropped 2391 // due to the receive buffer being full. 2392 ReceiveBufferOverflow StatCounter 2393 2394 // MalformedPacketsReceived is the number of incoming packets 2395 // dropped due to the packet header being in a malformed state. 2396 MalformedPacketsReceived StatCounter 2397 2398 // ClosedReceiver is the number of received packets dropped because 2399 // of receiving endpoint state being closed. 2400 ClosedReceiver StatCounter 2401 2402 // ChecksumErrors is the number of packets dropped due to bad checksums. 2403 ChecksumErrors StatCounter 2404 } 2405 2406 // SendErrors collects packet send errors within the transport layer for an 2407 // endpoint. 2408 // 2409 // +stateify savable 2410 type SendErrors struct { 2411 // SendToNetworkFailed is the number of packets failed to be written to 2412 // the network endpoint. 2413 SendToNetworkFailed StatCounter 2414 2415 // NoRoute is the number of times we failed to resolve IP route. 2416 NoRoute StatCounter 2417 } 2418 2419 // ReadErrors collects segment read errors from an endpoint read call. 2420 // 2421 // +stateify savable 2422 type ReadErrors struct { 2423 // ReadClosed is the number of received packet drops because the endpoint 2424 // was shutdown for read. 2425 ReadClosed StatCounter 2426 2427 // InvalidEndpointState is the number of times we found the endpoint state 2428 // to be unexpected. 2429 InvalidEndpointState StatCounter 2430 2431 // NotConnected is the number of times we tried to read but found that the 2432 // endpoint was not connected. 2433 NotConnected StatCounter 2434 } 2435 2436 // WriteErrors collects packet write errors from an endpoint write call. 2437 // 2438 // +stateify savable 2439 type WriteErrors struct { 2440 // WriteClosed is the number of packet drops because the endpoint 2441 // was shutdown for write. 2442 WriteClosed StatCounter 2443 2444 // InvalidEndpointState is the number of times we found the endpoint state 2445 // to be unexpected. 2446 InvalidEndpointState StatCounter 2447 2448 // InvalidArgs is the number of times invalid input arguments were 2449 // provided for endpoint Write call. 2450 InvalidArgs StatCounter 2451 } 2452 2453 // TransportEndpointStats collects statistics about the endpoint. 2454 // 2455 // +stateify savable 2456 type TransportEndpointStats struct { 2457 // PacketsReceived is the number of successful packet receives. 2458 PacketsReceived StatCounter 2459 2460 // PacketsSent is the number of successful packet sends. 2461 PacketsSent StatCounter 2462 2463 // ReceiveErrors collects packet receive errors within transport layer. 2464 ReceiveErrors ReceiveErrors 2465 2466 // ReadErrors collects packet read errors from an endpoint read call. 2467 ReadErrors ReadErrors 2468 2469 // SendErrors collects packet send errors within the transport layer. 2470 SendErrors SendErrors 2471 2472 // WriteErrors collects packet write errors from an endpoint write call. 2473 WriteErrors WriteErrors 2474 } 2475 2476 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats 2477 // marker interface. 2478 func (*TransportEndpointStats) IsEndpointStats() {} 2479 2480 // InitStatCounters initializes v's fields with nil StatCounter fields to new 2481 // StatCounters. 2482 func InitStatCounters(v reflect.Value) { 2483 for i := 0; i < v.NumField(); i++ { 2484 v := v.Field(i) 2485 if s, ok := v.Addr().Interface().(**StatCounter); ok { 2486 if *s == nil { 2487 *s = new(StatCounter) 2488 } 2489 } else if s, ok := v.Addr().Interface().(**IntegralStatCounterMap); ok { 2490 if *s == nil { 2491 *s = new(IntegralStatCounterMap) 2492 (*s).Init() 2493 } 2494 } else { 2495 InitStatCounters(v) 2496 } 2497 } 2498 } 2499 2500 // FillIn returns a copy of s with nil fields initialized to new StatCounters. 2501 func (s Stats) FillIn() Stats { 2502 InitStatCounters(reflect.ValueOf(&s).Elem()) 2503 return s 2504 } 2505 2506 // Clone clones a copy of the TransportEndpointStats into dst by atomically 2507 // reading each field. 2508 func (src *TransportEndpointStats) Clone(dst *TransportEndpointStats) { 2509 clone(reflect.ValueOf(dst).Elem(), reflect.ValueOf(src).Elem()) 2510 } 2511 2512 func clone(dst reflect.Value, src reflect.Value) { 2513 for i := 0; i < dst.NumField(); i++ { 2514 d := dst.Field(i) 2515 s := src.Field(i) 2516 if c, ok := s.Addr().Interface().(*StatCounter); ok { 2517 d.Addr().Interface().(*StatCounter).IncrementBy(c.Value()) 2518 } else { 2519 clone(d, s) 2520 } 2521 } 2522 } 2523 2524 // String implements the fmt.Stringer interface. 2525 func (a Address) String() string { 2526 switch l := a.Len(); l { 2527 case 4: 2528 return fmt.Sprintf("%d.%d.%d.%d", int(a.addr[0]), int(a.addr[1]), int(a.addr[2]), int(a.addr[3])) 2529 case 16: 2530 // Find the longest subsequence of hexadecimal zeros. 2531 start, end := -1, -1 2532 for i := 0; i < a.Len(); i += 2 { 2533 j := i 2534 for j < a.Len() && a.addr[j] == 0 && a.addr[j+1] == 0 { 2535 j += 2 2536 } 2537 if j > i+2 && j-i > end-start { 2538 start, end = i, j 2539 } 2540 } 2541 2542 var b strings.Builder 2543 for i := 0; i < a.Len(); i += 2 { 2544 if i == start { 2545 b.WriteString("::") 2546 i = end 2547 if end >= a.Len() { 2548 break 2549 } 2550 } else if i > 0 { 2551 b.WriteByte(':') 2552 } 2553 v := uint16(a.addr[i+0])<<8 | uint16(a.addr[i+1]) 2554 if v == 0 { 2555 b.WriteByte('0') 2556 } else { 2557 const digits = "0123456789abcdef" 2558 for i := uint(3); i < 4; i-- { 2559 if v := v >> (i * 4); v != 0 { 2560 b.WriteByte(digits[v&0xf]) 2561 } 2562 } 2563 } 2564 } 2565 return b.String() 2566 default: 2567 return fmt.Sprintf("%x", a.addr[:l]) 2568 } 2569 } 2570 2571 // To4 converts the IPv4 address to a 4-byte representation. 2572 // If the address is not an IPv4 address, To4 returns the empty Address. 2573 func (a Address) To4() Address { 2574 const ( 2575 ipv4len = 4 2576 ipv6len = 16 2577 ) 2578 if a.Len() == ipv4len { 2579 return a 2580 } 2581 if a.Len() == ipv6len && 2582 isZeros(a.addr[:10]) && 2583 a.addr[10] == 0xff && 2584 a.addr[11] == 0xff { 2585 return AddrFrom4Slice(a.addr[12:16]) 2586 } 2587 return Address{} 2588 } 2589 2590 // isZeros reports whether addr is all zeros. 2591 func isZeros(addr []byte) bool { 2592 for _, b := range addr { 2593 if b != 0 { 2594 return false 2595 } 2596 } 2597 return true 2598 } 2599 2600 // LinkAddress is a byte slice cast as a string that represents a link address. 2601 // It is typically a 6-byte MAC address. 2602 type LinkAddress string 2603 2604 // String implements the fmt.Stringer interface. 2605 func (a LinkAddress) String() string { 2606 switch len(a) { 2607 case 6: 2608 return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5]) 2609 default: 2610 return fmt.Sprintf("%x", []byte(a)) 2611 } 2612 } 2613 2614 // ParseMACAddress parses an IEEE 802 address. 2615 // 2616 // It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff. 2617 func ParseMACAddress(s string) (LinkAddress, error) { 2618 parts := strings.FieldsFunc(s, func(c rune) bool { 2619 return c == ':' || c == '-' 2620 }) 2621 if len(parts) != 6 { 2622 return "", fmt.Errorf("inconsistent parts: %s", s) 2623 } 2624 addr := make([]byte, 0, len(parts)) 2625 for _, part := range parts { 2626 u, err := strconv.ParseUint(part, 16, 8) 2627 if err != nil { 2628 return "", fmt.Errorf("invalid hex digits: %s", s) 2629 } 2630 addr = append(addr, byte(u)) 2631 } 2632 return LinkAddress(addr), nil 2633 } 2634 2635 // AddressWithPrefix is an address with its subnet prefix length. 2636 // 2637 // +stateify savable 2638 type AddressWithPrefix struct { 2639 // Address is a network address. 2640 Address Address 2641 2642 // PrefixLen is the subnet prefix length. 2643 PrefixLen int 2644 } 2645 2646 // String implements the fmt.Stringer interface. 2647 func (a AddressWithPrefix) String() string { 2648 return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen) 2649 } 2650 2651 // Subnet converts the address and prefix into a Subnet value and returns it. 2652 func (a AddressWithPrefix) Subnet() Subnet { 2653 addrLen := a.Address.length 2654 if a.PrefixLen <= 0 { 2655 return Subnet{ 2656 address: AddrFromSlice(bytes.Repeat([]byte{0}, addrLen)), 2657 mask: MaskFromBytes(bytes.Repeat([]byte{0}, addrLen)), 2658 } 2659 } 2660 if a.PrefixLen >= addrLen*8 { 2661 return Subnet{ 2662 address: a.Address, 2663 mask: MaskFromBytes(bytes.Repeat([]byte{0xff}, addrLen)), 2664 } 2665 } 2666 2667 sa := make([]byte, addrLen) 2668 sm := make([]byte, addrLen) 2669 n := uint(a.PrefixLen) 2670 for i := 0; i < addrLen; i++ { 2671 if n >= 8 { 2672 sa[i] = a.Address.addr[i] 2673 sm[i] = 0xff 2674 n -= 8 2675 continue 2676 } 2677 sm[i] = ^byte(0xff >> n) 2678 sa[i] = a.Address.addr[i] & sm[i] 2679 n = 0 2680 } 2681 2682 // For extra caution, call NewSubnet rather than directly creating the Subnet 2683 // value. If that fails it indicates a serious bug in this code, so panic is 2684 // in order. 2685 s, err := NewSubnet(AddrFromSlice(sa), MaskFromBytes(sm)) 2686 if err != nil { 2687 panic("invalid subnet: " + err.Error()) 2688 } 2689 return s 2690 } 2691 2692 // ProtocolAddress is an address and the network protocol it is associated 2693 // with. 2694 type ProtocolAddress struct { 2695 // Protocol is the protocol of the address. 2696 Protocol NetworkProtocolNumber 2697 2698 // AddressWithPrefix is a network address with its subnet prefix length. 2699 AddressWithPrefix AddressWithPrefix 2700 } 2701 2702 var ( 2703 // danglingEndpointsMu protects access to danglingEndpoints. 2704 danglingEndpointsMu sync.Mutex 2705 2706 // danglingEndpoints tracks all dangling endpoints no longer owned by the app. 2707 danglingEndpoints = make(map[Endpoint]struct{}) 2708 ) 2709 2710 // GetDanglingEndpoints returns all dangling endpoints. 2711 func GetDanglingEndpoints() []Endpoint { 2712 danglingEndpointsMu.Lock() 2713 es := make([]Endpoint, 0, len(danglingEndpoints)) 2714 for e := range danglingEndpoints { 2715 es = append(es, e) 2716 } 2717 danglingEndpointsMu.Unlock() 2718 return es 2719 } 2720 2721 // ReleaseDanglingEndpoints clears out all all reference counted objects held by 2722 // dangling endpoints. 2723 func ReleaseDanglingEndpoints() { 2724 // Get the dangling endpoints first to avoid locking around Release(), which 2725 // can cause a lock inversion with endpoint.mu and danglingEndpointsMu. 2726 // Calling Release on a dangling endpoint that has been deleted is a noop. 2727 eps := GetDanglingEndpoints() 2728 for _, ep := range eps { 2729 ep.Abort() 2730 } 2731 } 2732 2733 // AddDanglingEndpoint adds a dangling endpoint. 2734 func AddDanglingEndpoint(e Endpoint) { 2735 danglingEndpointsMu.Lock() 2736 danglingEndpoints[e] = struct{}{} 2737 danglingEndpointsMu.Unlock() 2738 } 2739 2740 // DeleteDanglingEndpoint removes a dangling endpoint. 2741 func DeleteDanglingEndpoint(e Endpoint) { 2742 danglingEndpointsMu.Lock() 2743 delete(danglingEndpoints, e) 2744 danglingEndpointsMu.Unlock() 2745 } 2746 2747 // AsyncLoading is the global barrier for asynchronous endpoint loading 2748 // activities. 2749 var AsyncLoading sync.WaitGroup