github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/tcpip/tcpip.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tcpip provides the interfaces and related types that users of the 16 // tcpip stack will use in order to create endpoints used to send and receive 17 // data over the network stack. 18 // 19 // The starting point is the creation and configuration of a stack. A stack can 20 // be created by calling the New() function of the tcpip/stack/stack package; 21 // configuring a stack involves creating NICs (via calls to Stack.CreateNIC()), 22 // adding network addresses (via calls to Stack.AddProtocolAddress()), and 23 // setting a route table (via a call to Stack.SetRouteTable()). 24 // 25 // Once a stack is configured, endpoints can be created by calling 26 // Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect 27 // to peers, listen for connections, accept connections, etc., depending on the 28 // transport protocol selected. 29 package tcpip 30 31 import ( 32 "bytes" 33 "errors" 34 "fmt" 35 "io" 36 "math" 37 "math/bits" 38 "reflect" 39 "strconv" 40 "strings" 41 "time" 42 43 "github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops" 44 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 45 "github.com/nicocha30/gvisor-ligolo/pkg/waiter" 46 ) 47 48 // Using the header package here would cause an import cycle. 49 const ( 50 ipv4AddressSize = 4 51 ipv4ProtocolNumber = 0x0800 52 ipv6AddressSize = 16 53 ipv6ProtocolNumber = 0x86dd 54 ) 55 56 // Errors related to Subnet 57 var ( 58 errSubnetLengthMismatch = errors.New("subnet length of address and mask differ") 59 errSubnetAddressMasked = errors.New("subnet address has bits set outside the mask") 60 ) 61 62 // ErrSaveRejection indicates a failed save due to unsupported networking state. 63 // This type of errors is only used for save logic. 64 type ErrSaveRejection struct { 65 Err error 66 } 67 68 // Error returns a sensible description of the save rejection error. 69 func (e *ErrSaveRejection) Error() string { 70 return "save rejected due to unsupported networking state: " + e.Err.Error() 71 } 72 73 // MonotonicTime is a monotonic clock reading. 74 // 75 // +stateify savable 76 type MonotonicTime struct { 77 nanoseconds int64 78 } 79 80 // String implements Stringer. 81 func (mt MonotonicTime) String() string { 82 return strconv.FormatInt(mt.nanoseconds, 10) 83 } 84 85 // MonotonicTimeInfinite returns the monotonic timestamp as far away in the 86 // future as possible. 87 func MonotonicTimeInfinite() MonotonicTime { 88 return MonotonicTime{nanoseconds: math.MaxInt64} 89 } 90 91 // Before reports whether the monotonic clock reading mt is before u. 92 func (mt MonotonicTime) Before(u MonotonicTime) bool { 93 return mt.nanoseconds < u.nanoseconds 94 } 95 96 // After reports whether the monotonic clock reading mt is after u. 97 func (mt MonotonicTime) After(u MonotonicTime) bool { 98 return mt.nanoseconds > u.nanoseconds 99 } 100 101 // Add returns the monotonic clock reading mt+d. 102 func (mt MonotonicTime) Add(d time.Duration) MonotonicTime { 103 return MonotonicTime{ 104 nanoseconds: time.Unix(0, mt.nanoseconds).Add(d).Sub(time.Unix(0, 0)).Nanoseconds(), 105 } 106 } 107 108 // Sub returns the duration mt-u. If the result exceeds the maximum (or minimum) 109 // value that can be stored in a Duration, the maximum (or minimum) duration 110 // will be returned. To compute t-d for a duration d, use t.Add(-d). 111 func (mt MonotonicTime) Sub(u MonotonicTime) time.Duration { 112 return time.Unix(0, mt.nanoseconds).Sub(time.Unix(0, u.nanoseconds)) 113 } 114 115 // A Clock provides the current time and schedules work for execution. 116 // 117 // Times returned by a Clock should always be used for application-visible 118 // time. Only monotonic times should be used for netstack internal timekeeping. 119 type Clock interface { 120 // Now returns the current local time. 121 Now() time.Time 122 123 // NowMonotonic returns the current monotonic clock reading. 124 NowMonotonic() MonotonicTime 125 126 // AfterFunc waits for the duration to elapse and then calls f in its own 127 // goroutine. It returns a Timer that can be used to cancel the call using 128 // its Stop method. 129 AfterFunc(d time.Duration, f func()) Timer 130 } 131 132 // Timer represents a single event. A Timer must be created with 133 // Clock.AfterFunc. 134 type Timer interface { 135 // Stop prevents the Timer from firing. It returns true if the call stops the 136 // timer, false if the timer has already expired or been stopped. 137 // 138 // If Stop returns false, then the timer has already expired and the function 139 // f of Clock.AfterFunc(d, f) has been started in its own goroutine; Stop 140 // does not wait for f to complete before returning. If the caller needs to 141 // know whether f is completed, it must coordinate with f explicitly. 142 Stop() bool 143 144 // Reset changes the timer to expire after duration d. 145 // 146 // Reset should be invoked only on stopped or expired timers. If the timer is 147 // known to have expired, Reset can be used directly. Otherwise, the caller 148 // must coordinate with the function f of Clock.AfterFunc(d, f). 149 Reset(d time.Duration) 150 } 151 152 // Address is a byte slice cast as a string that represents the address of a 153 // network node. Or, in the case of unix endpoints, it may represent a path. 154 // 155 // +stateify savable 156 type Address struct { 157 addr [16]byte 158 length int 159 } 160 161 // AddrFrom4 converts addr to an Address. 162 func AddrFrom4(addr [4]byte) Address { 163 ret := Address{ 164 length: 4, 165 } 166 // It's guaranteed that copy will return 4. 167 copy(ret.addr[:], addr[:]) 168 return ret 169 } 170 171 // AddrFrom4Slice converts addr to an Address. It panics if len(addr) != 4. 172 func AddrFrom4Slice(addr []byte) Address { 173 if len(addr) != 4 { 174 panic(fmt.Sprintf("bad address length for address %v", addr)) 175 } 176 ret := Address{ 177 length: 4, 178 } 179 // It's guaranteed that copy will return 4. 180 copy(ret.addr[:], addr) 181 return ret 182 } 183 184 // AddrFrom16 converts addr to an Address. 185 func AddrFrom16(addr [16]byte) Address { 186 ret := Address{ 187 length: 16, 188 } 189 // It's guaranteed that copy will return 16. 190 copy(ret.addr[:], addr[:]) 191 return ret 192 } 193 194 // AddrFrom16Slice converts addr to an Address. It panics if len(addr) != 16. 195 func AddrFrom16Slice(addr []byte) Address { 196 if len(addr) != 16 { 197 panic(fmt.Sprintf("bad address length for address %v", addr)) 198 } 199 ret := Address{ 200 length: 16, 201 } 202 // It's guaranteed that copy will return 16. 203 copy(ret.addr[:], addr) 204 return ret 205 } 206 207 // AddrFromSlice converts addr to an Address. It returns the Address zero value 208 // if len(addr) != 4 or 16. 209 func AddrFromSlice(addr []byte) Address { 210 switch len(addr) { 211 case ipv4AddressSize: 212 return AddrFrom4Slice(addr) 213 case ipv6AddressSize: 214 return AddrFrom16Slice(addr) 215 } 216 return Address{} 217 } 218 219 // As4 returns a as a 4 byte array. It panics if the address length is not 4. 220 func (a Address) As4() [4]byte { 221 if a.Len() != 4 { 222 panic(fmt.Sprintf("bad address length for address %v", a.addr)) 223 } 224 return [4]byte(a.addr[:4]) 225 } 226 227 // As16 returns a as a 16 byte array. It panics if the address length is not 16. 228 func (a Address) As16() [16]byte { 229 if a.Len() != 16 { 230 panic(fmt.Sprintf("bad address length for address %v", a.addr)) 231 } 232 return [16]byte(a.addr[:16]) 233 } 234 235 // AsSlice returns a as a byte slice. Callers should be careful as it can 236 // return a window into existing memory. 237 // 238 // +checkescape 239 func (a *Address) AsSlice() []byte { 240 return a.addr[:a.length] 241 } 242 243 // BitLen returns the length in bits of a. 244 func (a Address) BitLen() int { 245 return a.Len() * 8 246 } 247 248 // Len returns the length in bytes of a. 249 func (a Address) Len() int { 250 return a.length 251 } 252 253 // WithPrefix returns the address with a prefix that represents a point subnet. 254 func (a Address) WithPrefix() AddressWithPrefix { 255 return AddressWithPrefix{ 256 Address: a, 257 PrefixLen: a.BitLen(), 258 } 259 } 260 261 // Unspecified returns true if the address is unspecified. 262 func (a Address) Unspecified() bool { 263 for _, b := range a.addr { 264 if b != 0 { 265 return false 266 } 267 } 268 return true 269 } 270 271 // Equal returns whether a and other are equal. It exists for use by the cmp 272 // library. 273 func (a Address) Equal(other Address) bool { 274 return a == other 275 } 276 277 // MatchingPrefix returns the matching prefix length in bits. 278 // 279 // Panics if b and a have different lengths. 280 func (a Address) MatchingPrefix(b Address) uint8 { 281 const bitsInAByte = 8 282 283 if a.Len() != b.Len() { 284 panic(fmt.Sprintf("addresses %s and %s do not have the same length", a, b)) 285 } 286 287 var prefix uint8 288 for i := 0; i < a.length; i++ { 289 aByte := a.addr[i] 290 bByte := b.addr[i] 291 292 if aByte == bByte { 293 prefix += bitsInAByte 294 continue 295 } 296 297 // Count the remaining matching bits in the byte from MSbit to LSBbit. 298 mask := uint8(1) << (bitsInAByte - 1) 299 for { 300 if aByte&mask == bByte&mask { 301 prefix++ 302 mask >>= 1 303 continue 304 } 305 306 break 307 } 308 309 break 310 } 311 312 return prefix 313 } 314 315 // AddressMask is a bitmask for an address. 316 // 317 // +stateify savable 318 type AddressMask struct { 319 mask string 320 } 321 322 // MaskFrom returns a Mask based on str. 323 func MaskFrom(str string) AddressMask { 324 return AddressMask{mask: str} 325 } 326 327 // MaskFromBytes returns a Mask based on bs. 328 func MaskFromBytes(bs []byte) AddressMask { 329 return AddressMask{mask: string(bs)} 330 } 331 332 // String implements Stringer. 333 func (m AddressMask) String() string { 334 return fmt.Sprintf("%x", m.mask) 335 } 336 337 // AsSlice returns a as a byte slice. Callers should be careful as it can 338 // return a window into existing memory. 339 func (m *AddressMask) AsSlice() []byte { 340 return []byte(m.mask) 341 } 342 343 // BitLen returns the length of the mask in bits. 344 func (m AddressMask) BitLen() int { 345 return len(m.mask) * 8 346 } 347 348 // Len returns the length of the mask in bytes. 349 func (m AddressMask) Len() int { 350 return len(m.mask) 351 } 352 353 // Prefix returns the number of bits before the first host bit. 354 func (m AddressMask) Prefix() int { 355 p := 0 356 for _, b := range []byte(m.mask) { 357 p += bits.LeadingZeros8(^b) 358 } 359 return p 360 } 361 362 // Equal returns whether m and other are equal. It exists for use by the cmp 363 // library. 364 func (m AddressMask) Equal(other AddressMask) bool { 365 return m == other 366 } 367 368 // Subnet is a subnet defined by its address and mask. 369 type Subnet struct { 370 address Address 371 mask AddressMask 372 } 373 374 // NewSubnet creates a new Subnet, checking that the address and mask are the same length. 375 func NewSubnet(a Address, m AddressMask) (Subnet, error) { 376 if a.Len() != m.Len() { 377 return Subnet{}, errSubnetLengthMismatch 378 } 379 for i := 0; i < a.Len(); i++ { 380 if a.addr[i]&^m.mask[i] != 0 { 381 return Subnet{}, errSubnetAddressMasked 382 } 383 } 384 return Subnet{a, m}, nil 385 } 386 387 // String implements Stringer. 388 func (s Subnet) String() string { 389 return fmt.Sprintf("%s/%d", s.ID(), s.Prefix()) 390 } 391 392 // Contains returns true iff the address is of the same length and matches the 393 // subnet address and mask. 394 func (s *Subnet) Contains(a Address) bool { 395 if a.Len() != s.address.Len() { 396 return false 397 } 398 for i := 0; i < a.Len(); i++ { 399 if a.addr[i]&s.mask.mask[i] != s.address.addr[i] { 400 return false 401 } 402 } 403 return true 404 } 405 406 // ID returns the subnet ID. 407 func (s *Subnet) ID() Address { 408 return s.address 409 } 410 411 // Bits returns the number of ones (network bits) and zeros (host bits) in the 412 // subnet mask. 413 func (s *Subnet) Bits() (ones int, zeros int) { 414 ones = s.mask.Prefix() 415 return ones, s.mask.BitLen() - ones 416 } 417 418 // Prefix returns the number of bits before the first host bit. 419 func (s *Subnet) Prefix() int { 420 return s.mask.Prefix() 421 } 422 423 // Mask returns the subnet mask. 424 func (s *Subnet) Mask() AddressMask { 425 return s.mask 426 } 427 428 // Broadcast returns the subnet's broadcast address. 429 func (s *Subnet) Broadcast() Address { 430 addrCopy := s.address 431 for i := 0; i < addrCopy.Len(); i++ { 432 addrCopy.addr[i] |= ^s.mask.mask[i] 433 } 434 return addrCopy 435 } 436 437 // IsBroadcast returns true if the address is considered a broadcast address. 438 func (s *Subnet) IsBroadcast(address Address) bool { 439 // Only IPv4 supports the notion of a broadcast address. 440 if address.Len() != ipv4AddressSize { 441 return false 442 } 443 444 // Normally, we would just compare address with the subnet's broadcast 445 // address but there is an exception where a simple comparison is not 446 // correct. This exception is for /31 and /32 IPv4 subnets where all 447 // addresses are considered valid host addresses. 448 // 449 // For /31 subnets, the case is easy. RFC 3021 Section 2.1 states that 450 // both addresses in a /31 subnet "MUST be interpreted as host addresses." 451 // 452 // For /32, the case is a bit more vague. RFC 3021 makes no mention of /32 453 // subnets. However, the same reasoning applies - if an exception is not 454 // made, then there do not exist any host addresses in a /32 subnet. RFC 455 // 4632 Section 3.1 also vaguely implies this interpretation by referring 456 // to addresses in /32 subnets as "host routes." 457 return s.Prefix() <= 30 && s.Broadcast() == address 458 } 459 460 // Equal returns true if this Subnet is equal to the given Subnet. 461 func (s Subnet) Equal(o Subnet) bool { 462 // If this changes, update Route.Equal accordingly. 463 return s == o 464 } 465 466 // NICID is a number that uniquely identifies a NIC. 467 type NICID int32 468 469 // ShutdownFlags represents flags that can be passed to the Shutdown() method 470 // of the Endpoint interface. 471 type ShutdownFlags int 472 473 // Values of the flags that can be passed to the Shutdown() method. They can 474 // be OR'ed together. 475 const ( 476 ShutdownRead ShutdownFlags = 1 << iota 477 ShutdownWrite 478 ) 479 480 // PacketType is used to indicate the destination of the packet. 481 type PacketType uint8 482 483 const ( 484 // PacketHost indicates a packet addressed to the local host. 485 PacketHost PacketType = iota 486 487 // PacketOtherHost indicates an outgoing packet addressed to 488 // another host caught by a NIC in promiscuous mode. 489 PacketOtherHost 490 491 // PacketOutgoing for a packet originating from the local host 492 // that is looped back to a packet socket. 493 PacketOutgoing 494 495 // PacketBroadcast indicates a link layer broadcast packet. 496 PacketBroadcast 497 498 // PacketMulticast indicates a link layer multicast packet. 499 PacketMulticast 500 ) 501 502 // FullAddress represents a full transport node address, as required by the 503 // Connect() and Bind() methods. 504 // 505 // +stateify savable 506 type FullAddress struct { 507 // NIC is the ID of the NIC this address refers to. 508 // 509 // This may not be used by all endpoint types. 510 NIC NICID 511 512 // Addr is the network address. 513 Addr Address 514 515 // Port is the transport port. 516 // 517 // This may not be used by all endpoint types. 518 Port uint16 519 520 // LinkAddr is the link layer address. 521 LinkAddr LinkAddress 522 } 523 524 // Payloader is an interface that provides data. 525 // 526 // This interface allows the endpoint to request the amount of data it needs 527 // based on internal buffers without exposing them. 528 type Payloader interface { 529 io.Reader 530 531 // Len returns the number of bytes of the unread portion of the 532 // Reader. 533 Len() int 534 } 535 536 var _ Payloader = (*bytes.Buffer)(nil) 537 var _ Payloader = (*bytes.Reader)(nil) 538 539 var _ io.Writer = (*SliceWriter)(nil) 540 541 // SliceWriter implements io.Writer for slices. 542 type SliceWriter []byte 543 544 // Write implements io.Writer.Write. 545 func (s *SliceWriter) Write(b []byte) (int, error) { 546 n := copy(*s, b) 547 *s = (*s)[n:] 548 var err error 549 if n != len(b) { 550 err = io.ErrShortWrite 551 } 552 return n, err 553 } 554 555 var _ io.Writer = (*LimitedWriter)(nil) 556 557 // A LimitedWriter writes to W but limits the amount of data copied to just N 558 // bytes. Each call to Write updates N to reflect the new amount remaining. 559 type LimitedWriter struct { 560 W io.Writer 561 N int64 562 } 563 564 func (l *LimitedWriter) Write(p []byte) (int, error) { 565 pLen := int64(len(p)) 566 if pLen > l.N { 567 p = p[:l.N] 568 } 569 n, err := l.W.Write(p) 570 n64 := int64(n) 571 if err == nil && n64 != pLen { 572 err = io.ErrShortWrite 573 } 574 l.N -= n64 575 return n, err 576 } 577 578 // SendableControlMessages contains socket control messages that can be written. 579 // 580 // +stateify savable 581 type SendableControlMessages struct { 582 // HasTTL indicates whether TTL is valid/set. 583 HasTTL bool 584 585 // TTL is the IPv4 Time To Live of the associated packet. 586 TTL uint8 587 588 // HasHopLimit indicates whether HopLimit is valid/set. 589 HasHopLimit bool 590 591 // HopLimit is the IPv6 Hop Limit of the associated packet. 592 HopLimit uint8 593 594 // HasIPv6PacketInfo indicates whether IPv6PacketInfo is set. 595 HasIPv6PacketInfo bool 596 597 // IPv6PacketInfo holds interface and address data on an incoming packet. 598 IPv6PacketInfo IPv6PacketInfo 599 } 600 601 // ReceivableControlMessages contains socket control messages that can be 602 // received. 603 // 604 // +stateify savable 605 type ReceivableControlMessages struct { 606 // Timestamp is the time that the last packet used to create the read data 607 // was received. 608 Timestamp time.Time `state:".(int64)"` 609 610 // HasInq indicates whether Inq is valid/set. 611 HasInq bool 612 613 // Inq is the number of bytes ready to be received. 614 Inq int32 615 616 // HasTOS indicates whether TOS is valid/set. 617 HasTOS bool 618 619 // TOS is the IPv4 type of service of the associated packet. 620 TOS uint8 621 622 // HasTTL indicates whether TTL is valid/set. 623 HasTTL bool 624 625 // TTL is the IPv4 Time To Live of the associated packet. 626 TTL uint8 627 628 // HasHopLimit indicates whether HopLimit is valid/set. 629 HasHopLimit bool 630 631 // HopLimit is the IPv6 Hop Limit of the associated packet. 632 HopLimit uint8 633 634 // HasTimestamp indicates whether Timestamp is valid/set. 635 HasTimestamp bool 636 637 // HasTClass indicates whether TClass is valid/set. 638 HasTClass bool 639 640 // TClass is the IPv6 traffic class of the associated packet. 641 TClass uint32 642 643 // HasIPPacketInfo indicates whether PacketInfo is set. 644 HasIPPacketInfo bool 645 646 // PacketInfo holds interface and address data on an incoming packet. 647 PacketInfo IPPacketInfo 648 649 // HasIPv6PacketInfo indicates whether IPv6PacketInfo is set. 650 HasIPv6PacketInfo bool 651 652 // IPv6PacketInfo holds interface and address data on an incoming packet. 653 IPv6PacketInfo IPv6PacketInfo 654 655 // HasOriginalDestinationAddress indicates whether OriginalDstAddress is 656 // set. 657 HasOriginalDstAddress bool 658 659 // OriginalDestinationAddress holds the original destination address 660 // and port of the incoming packet. 661 OriginalDstAddress FullAddress 662 663 // SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE). 664 SockErr *SockError 665 } 666 667 // PacketOwner is used to get UID and GID of the packet. 668 type PacketOwner interface { 669 // KUID returns KUID of the packet. 670 KUID() uint32 671 672 // KGID returns KGID of the packet. 673 KGID() uint32 674 } 675 676 // ReadOptions contains options for Endpoint.Read. 677 type ReadOptions struct { 678 // Peek indicates whether this read is a peek. 679 Peek bool 680 681 // NeedRemoteAddr indicates whether to return the remote address, if 682 // supported. 683 NeedRemoteAddr bool 684 685 // NeedLinkPacketInfo indicates whether to return the link-layer information, 686 // if supported. 687 NeedLinkPacketInfo bool 688 } 689 690 // ReadResult represents result for a successful Endpoint.Read. 691 type ReadResult struct { 692 // Count is the number of bytes received and written to the buffer. 693 Count int 694 695 // Total is the number of bytes of the received packet. This can be used to 696 // determine whether the read is truncated. 697 Total int 698 699 // ControlMessages is the control messages received. 700 ControlMessages ReceivableControlMessages 701 702 // RemoteAddr is the remote address if ReadOptions.NeedAddr is true. 703 RemoteAddr FullAddress 704 705 // LinkPacketInfo is the link-layer information of the received packet if 706 // ReadOptions.NeedLinkPacketInfo is true. 707 LinkPacketInfo LinkPacketInfo 708 } 709 710 // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) 711 // that exposes functionality like read, write, connect, etc. to users of the 712 // networking stack. 713 type Endpoint interface { 714 // Close puts the endpoint in a closed state and frees all resources 715 // associated with it. Close initiates the teardown process, the 716 // Endpoint may not be fully closed when Close returns. 717 Close() 718 719 // Abort initiates an expedited endpoint teardown. As compared to 720 // Close, Abort prioritizes closing the Endpoint quickly over cleanly. 721 // Abort is best effort; implementing Abort with Close is acceptable. 722 Abort() 723 724 // Read reads data from the endpoint and optionally writes to dst. 725 // 726 // This method does not block if there is no data pending; in this case, 727 // ErrWouldBlock is returned. 728 // 729 // If non-zero number of bytes are successfully read and written to dst, err 730 // must be nil. Otherwise, if dst failed to write anything, ErrBadBuffer 731 // should be returned. 732 Read(io.Writer, ReadOptions) (ReadResult, Error) 733 734 // Write writes data to the endpoint's peer. This method does not block if 735 // the data cannot be written. 736 // 737 // Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes 738 // successfully written to the Endpoint. That is, if a call to 739 // Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and 740 // the caller should not use data[:n] after Write returns. 741 // 742 // Note that unlike io.Writer.Write, it is not an error for Write to 743 // perform a partial write (if n > 0, no error may be returned). Only 744 // stream (TCP) Endpoints may return partial writes, and even then only 745 // in the case where writing additional data would block. Other Endpoints 746 // will either write the entire message or return an error. 747 Write(Payloader, WriteOptions) (int64, Error) 748 749 // Connect connects the endpoint to its peer. Specifying a NIC is 750 // optional. 751 // 752 // There are three classes of return values: 753 // nil -- the attempt to connect succeeded. 754 // ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started 755 // but hasn't completed yet. In this case, the caller must call Connect 756 // or GetSockOpt(ErrorOption) when the endpoint becomes writable to 757 // get the actual result. The first call to Connect after the socket has 758 // connected returns nil. Calling connect again results in ErrAlreadyConnected. 759 // Anything else -- the attempt to connect failed. 760 // 761 // If address.Addr is empty, this means that Endpoint has to be 762 // disconnected if this is supported, otherwise 763 // ErrAddressFamilyNotSupported must be returned. 764 Connect(address FullAddress) Error 765 766 // Disconnect disconnects the endpoint from its peer. 767 Disconnect() Error 768 769 // Shutdown closes the read and/or write end of the endpoint connection 770 // to its peer. 771 Shutdown(flags ShutdownFlags) Error 772 773 // Listen puts the endpoint in "listen" mode, which allows it to accept 774 // new connections. 775 Listen(backlog int) Error 776 777 // Accept returns a new endpoint if a peer has established a connection 778 // to an endpoint previously set to listen mode. This method does not 779 // block if no new connections are available. 780 // 781 // The returned Queue is the wait queue for the newly created endpoint. 782 // 783 // If peerAddr is not nil then it is populated with the peer address of the 784 // returned endpoint. 785 Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, Error) 786 787 // Bind binds the endpoint to a specific local address and port. 788 // Specifying a NIC is optional. 789 Bind(address FullAddress) Error 790 791 // GetLocalAddress returns the address to which the endpoint is bound. 792 GetLocalAddress() (FullAddress, Error) 793 794 // GetRemoteAddress returns the address to which the endpoint is 795 // connected. 796 GetRemoteAddress() (FullAddress, Error) 797 798 // Readiness returns the current readiness of the endpoint. For example, 799 // if waiter.EventIn is set, the endpoint is immediately readable. 800 Readiness(mask waiter.EventMask) waiter.EventMask 801 802 // SetSockOpt sets a socket option. 803 SetSockOpt(opt SettableSocketOption) Error 804 805 // SetSockOptInt sets a socket option, for simple cases where a value 806 // has the int type. 807 SetSockOptInt(opt SockOptInt, v int) Error 808 809 // GetSockOpt gets a socket option. 810 GetSockOpt(opt GettableSocketOption) Error 811 812 // GetSockOptInt gets a socket option for simple cases where a return 813 // value has the int type. 814 GetSockOptInt(SockOptInt) (int, Error) 815 816 // State returns a socket's lifecycle state. The returned value is 817 // protocol-specific and is primarily used for diagnostics. 818 State() uint32 819 820 // ModerateRecvBuf should be called everytime data is copied to the user 821 // space. This allows for dynamic tuning of recv buffer space for a 822 // given socket. 823 // 824 // NOTE: This method is a no-op for sockets other than TCP. 825 ModerateRecvBuf(copied int) 826 827 // Info returns a copy to the transport endpoint info. 828 Info() EndpointInfo 829 830 // Stats returns a reference to the endpoint stats. 831 Stats() EndpointStats 832 833 // SetOwner sets the task owner to the endpoint owner. 834 SetOwner(owner PacketOwner) 835 836 // LastError clears and returns the last error reported by the endpoint. 837 LastError() Error 838 839 // SocketOptions returns the structure which contains all the socket 840 // level options. 841 SocketOptions() *SocketOptions 842 } 843 844 // EndpointWithPreflight is the interface implemented by endpoints that need 845 // to expose the `Preflight` method for preparing the endpoint prior to 846 // calling `Write`. 847 type EndpointWithPreflight interface { 848 // Prepares the endpoint for writes using the provided WriteOptions, 849 // returning an error if the options were incompatible with the endpoint's 850 // current state. 851 Preflight(WriteOptions) Error 852 } 853 854 // LinkPacketInfo holds Link layer information for a received packet. 855 // 856 // +stateify savable 857 type LinkPacketInfo struct { 858 // Protocol is the NetworkProtocolNumber for the packet. 859 Protocol NetworkProtocolNumber 860 861 // PktType is used to indicate the destination of the packet. 862 PktType PacketType 863 } 864 865 // EndpointInfo is the interface implemented by each endpoint info struct. 866 type EndpointInfo interface { 867 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo 868 // marker interface. 869 IsEndpointInfo() 870 } 871 872 // EndpointStats is the interface implemented by each endpoint stats struct. 873 type EndpointStats interface { 874 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats 875 // marker interface. 876 IsEndpointStats() 877 } 878 879 // WriteOptions contains options for Endpoint.Write. 880 type WriteOptions struct { 881 // If To is not nil, write to the given address instead of the endpoint's 882 // peer. 883 To *FullAddress 884 885 // More has the same semantics as Linux's MSG_MORE. 886 More bool 887 888 // EndOfRecord has the same semantics as Linux's MSG_EOR. 889 EndOfRecord bool 890 891 // Atomic means that all data fetched from Payloader must be written to the 892 // endpoint. If Atomic is false, then data fetched from the Payloader may be 893 // discarded if available endpoint buffer space is unsufficient. 894 Atomic bool 895 896 // ControlMessages contains optional overrides used when writing a packet. 897 ControlMessages SendableControlMessages 898 } 899 900 // SockOptInt represents socket options which values have the int type. 901 type SockOptInt int 902 903 const ( 904 // KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to 905 // specify the number of un-ACKed TCP keepalives that will be sent 906 // before the connection is closed. 907 KeepaliveCountOption SockOptInt = iota 908 909 // IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS 910 // for all subsequent outgoing IPv4 packets from the endpoint. 911 IPv4TOSOption 912 913 // IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to 914 // specify TOS for all subsequent outgoing IPv6 packets from the 915 // endpoint. 916 IPv6TrafficClassOption 917 918 // MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the 919 // current Maximum Segment Size(MSS) value as specified using the 920 // TCP_MAXSEG option. 921 MaxSegOption 922 923 // MTUDiscoverOption is used to set/get the path MTU discovery setting. 924 // 925 // NOTE: Setting this option to any other value than PMTUDiscoveryDont 926 // is not supported and will fail as such, and getting this option will 927 // always return PMTUDiscoveryDont. 928 MTUDiscoverOption 929 930 // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control 931 // the default TTL value for multicast messages. The default is 1. 932 MulticastTTLOption 933 934 // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the 935 // number of unread bytes in the input buffer should be returned. 936 ReceiveQueueSizeOption 937 938 // SendQueueSizeOption is used in GetSockOptInt to specify that the 939 // number of unread bytes in the output buffer should be returned. 940 SendQueueSizeOption 941 942 // IPv4TTLOption is used by SetSockOptInt/GetSockOptInt to control the default 943 // TTL value for unicast messages. 944 // 945 // The default is configured by DefaultTTLOption. A UseDefaultIPv4TTL value 946 // configures the endpoint to use the default. 947 IPv4TTLOption 948 949 // IPv6HopLimitOption is used by SetSockOptInt/GetSockOptInt to control the 950 // default hop limit value for unicast messages. 951 // 952 // The default is configured by DefaultTTLOption. A UseDefaultIPv6HopLimit 953 // value configures the endpoint to use the default. 954 IPv6HopLimitOption 955 956 // TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify 957 // the number of SYN retransmits that TCP should send before aborting 958 // the attempt to connect. It cannot exceed 255. 959 // 960 // NOTE: This option is currently only stubbed out and is no-op. 961 TCPSynCountOption 962 963 // TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound 964 // the size of the advertised window to this value. 965 // 966 // NOTE: This option is currently only stubed out and is a no-op 967 TCPWindowClampOption 968 969 // IPv6Checksum is used to request the stack to populate and validate the IPv6 970 // checksum for transport level headers. 971 IPv6Checksum 972 ) 973 974 const ( 975 // UseDefaultIPv4TTL is the IPv4TTLOption value that configures an endpoint to 976 // use the default ttl currently configured by the IPv4 protocol (see 977 // DefaultTTLOption). 978 UseDefaultIPv4TTL = 0 979 980 // UseDefaultIPv6HopLimit is the IPv6HopLimitOption value that configures an 981 // endpoint to use the default hop limit currently configured by the IPv6 982 // protocol (see DefaultTTLOption). 983 UseDefaultIPv6HopLimit = -1 984 ) 985 986 const ( 987 // PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use 988 // per-route settings. 989 PMTUDiscoveryWant int = iota 990 991 // PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable 992 // path MTU discovery. 993 PMTUDiscoveryDont 994 995 // PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do 996 // path MTU discovery. 997 PMTUDiscoveryDo 998 999 // PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF 1000 // but ignore path MTU. 1001 PMTUDiscoveryProbe 1002 ) 1003 1004 // GettableNetworkProtocolOption is a marker interface for network protocol 1005 // options that may be queried. 1006 type GettableNetworkProtocolOption interface { 1007 isGettableNetworkProtocolOption() 1008 } 1009 1010 // SettableNetworkProtocolOption is a marker interface for network protocol 1011 // options that may be set. 1012 type SettableNetworkProtocolOption interface { 1013 isSettableNetworkProtocolOption() 1014 } 1015 1016 // DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify 1017 // a default TTL. 1018 type DefaultTTLOption uint8 1019 1020 func (*DefaultTTLOption) isGettableNetworkProtocolOption() {} 1021 1022 func (*DefaultTTLOption) isSettableNetworkProtocolOption() {} 1023 1024 // GettableTransportProtocolOption is a marker interface for transport protocol 1025 // options that may be queried. 1026 type GettableTransportProtocolOption interface { 1027 isGettableTransportProtocolOption() 1028 } 1029 1030 // SettableTransportProtocolOption is a marker interface for transport protocol 1031 // options that may be set. 1032 type SettableTransportProtocolOption interface { 1033 isSettableTransportProtocolOption() 1034 } 1035 1036 // TCPSACKEnabled the SACK option for TCP. 1037 // 1038 // See: https://tools.ietf.org/html/rfc2018. 1039 type TCPSACKEnabled bool 1040 1041 func (*TCPSACKEnabled) isGettableTransportProtocolOption() {} 1042 1043 func (*TCPSACKEnabled) isSettableTransportProtocolOption() {} 1044 1045 // TCPRecovery is the loss deteoction algorithm used by TCP. 1046 type TCPRecovery int32 1047 1048 func (*TCPRecovery) isGettableTransportProtocolOption() {} 1049 1050 func (*TCPRecovery) isSettableTransportProtocolOption() {} 1051 1052 // TCPAlwaysUseSynCookies indicates unconditional usage of syncookies. 1053 type TCPAlwaysUseSynCookies bool 1054 1055 func (*TCPAlwaysUseSynCookies) isGettableTransportProtocolOption() {} 1056 1057 func (*TCPAlwaysUseSynCookies) isSettableTransportProtocolOption() {} 1058 1059 const ( 1060 // TCPRACKLossDetection indicates RACK is used for loss detection and 1061 // recovery. 1062 TCPRACKLossDetection TCPRecovery = 1 << iota 1063 1064 // TCPRACKStaticReoWnd indicates the reordering window should not be 1065 // adjusted when DSACK is received. 1066 TCPRACKStaticReoWnd 1067 1068 // TCPRACKNoDupTh indicates RACK should not consider the classic three 1069 // duplicate acknowledgements rule to mark the segments as lost. This 1070 // is used when reordering is not detected. 1071 TCPRACKNoDupTh 1072 ) 1073 1074 // TCPDelayEnabled enables/disables Nagle's algorithm in TCP. 1075 type TCPDelayEnabled bool 1076 1077 func (*TCPDelayEnabled) isGettableTransportProtocolOption() {} 1078 1079 func (*TCPDelayEnabled) isSettableTransportProtocolOption() {} 1080 1081 // TCPSendBufferSizeRangeOption is the send buffer size range for TCP. 1082 type TCPSendBufferSizeRangeOption struct { 1083 Min int 1084 Default int 1085 Max int 1086 } 1087 1088 func (*TCPSendBufferSizeRangeOption) isGettableTransportProtocolOption() {} 1089 1090 func (*TCPSendBufferSizeRangeOption) isSettableTransportProtocolOption() {} 1091 1092 // TCPReceiveBufferSizeRangeOption is the receive buffer size range for TCP. 1093 type TCPReceiveBufferSizeRangeOption struct { 1094 Min int 1095 Default int 1096 Max int 1097 } 1098 1099 func (*TCPReceiveBufferSizeRangeOption) isGettableTransportProtocolOption() {} 1100 1101 func (*TCPReceiveBufferSizeRangeOption) isSettableTransportProtocolOption() {} 1102 1103 // TCPAvailableCongestionControlOption is the supported congestion control 1104 // algorithms for TCP 1105 type TCPAvailableCongestionControlOption string 1106 1107 func (*TCPAvailableCongestionControlOption) isGettableTransportProtocolOption() {} 1108 1109 func (*TCPAvailableCongestionControlOption) isSettableTransportProtocolOption() {} 1110 1111 // TCPModerateReceiveBufferOption enables/disables receive buffer moderation 1112 // for TCP. 1113 type TCPModerateReceiveBufferOption bool 1114 1115 func (*TCPModerateReceiveBufferOption) isGettableTransportProtocolOption() {} 1116 1117 func (*TCPModerateReceiveBufferOption) isSettableTransportProtocolOption() {} 1118 1119 // GettableSocketOption is a marker interface for socket options that may be 1120 // queried. 1121 type GettableSocketOption interface { 1122 isGettableSocketOption() 1123 } 1124 1125 // SettableSocketOption is a marker interface for socket options that may be 1126 // configured. 1127 type SettableSocketOption interface { 1128 isSettableSocketOption() 1129 } 1130 1131 // ICMPv6Filter specifes a filter for ICMPv6 types. 1132 // 1133 // +stateify savable 1134 type ICMPv6Filter struct { 1135 // DenyType indicates if an ICMP type should be blocked. 1136 // 1137 // The ICMPv6 type field is 8 bits so there are up to 256 different ICMPv6 1138 // types. 1139 DenyType [8]uint32 1140 } 1141 1142 // ShouldDeny returns true iff the ICMPv6 Type should be denied. 1143 func (f *ICMPv6Filter) ShouldDeny(icmpType uint8) bool { 1144 const bitsInUint32 = 32 1145 i := icmpType / bitsInUint32 1146 b := icmpType % bitsInUint32 1147 return f.DenyType[i]&(1<<b) != 0 1148 } 1149 1150 func (*ICMPv6Filter) isGettableSocketOption() {} 1151 1152 func (*ICMPv6Filter) isSettableSocketOption() {} 1153 1154 // EndpointState represents the state of an endpoint. 1155 type EndpointState uint8 1156 1157 // CongestionControlState indicates the current congestion control state for 1158 // TCP sender. 1159 type CongestionControlState int 1160 1161 const ( 1162 // Open indicates that the sender is receiving acks in order and 1163 // no loss or dupACK's etc have been detected. 1164 Open CongestionControlState = iota 1165 // RTORecovery indicates that an RTO has occurred and the sender 1166 // has entered an RTO based recovery phase. 1167 RTORecovery 1168 // FastRecovery indicates that the sender has entered FastRecovery 1169 // based on receiving nDupAck's. This state is entered only when 1170 // SACK is not in use. 1171 FastRecovery 1172 // SACKRecovery indicates that the sender has entered SACK based 1173 // recovery. 1174 SACKRecovery 1175 // Disorder indicates the sender either received some SACK blocks 1176 // or dupACK's. 1177 Disorder 1178 ) 1179 1180 // TCPInfoOption is used by GetSockOpt to expose TCP statistics. 1181 // 1182 // TODO(b/64800844): Add and populate stat fields. 1183 type TCPInfoOption struct { 1184 // RTT is the smoothed round trip time. 1185 RTT time.Duration 1186 1187 // RTTVar is the round trip time variation. 1188 RTTVar time.Duration 1189 1190 // RTO is the retransmission timeout for the endpoint. 1191 RTO time.Duration 1192 1193 // State is the current endpoint protocol state. 1194 State EndpointState 1195 1196 // CcState is the congestion control state. 1197 CcState CongestionControlState 1198 1199 // SndCwnd is the congestion window, in packets. 1200 SndCwnd uint32 1201 1202 // SndSsthresh is the threshold between slow start and congestion 1203 // avoidance. 1204 SndSsthresh uint32 1205 1206 // ReorderSeen indicates if reordering is seen in the endpoint. 1207 ReorderSeen bool 1208 } 1209 1210 func (*TCPInfoOption) isGettableSocketOption() {} 1211 1212 // KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a 1213 // connection must remain idle before the first TCP keepalive packet is sent. 1214 // Once this time is reached, KeepaliveIntervalOption is used instead. 1215 type KeepaliveIdleOption time.Duration 1216 1217 func (*KeepaliveIdleOption) isGettableSocketOption() {} 1218 1219 func (*KeepaliveIdleOption) isSettableSocketOption() {} 1220 1221 // KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the 1222 // interval between sending TCP keepalive packets. 1223 type KeepaliveIntervalOption time.Duration 1224 1225 func (*KeepaliveIntervalOption) isGettableSocketOption() {} 1226 1227 func (*KeepaliveIntervalOption) isSettableSocketOption() {} 1228 1229 // TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user 1230 // specified timeout for a given TCP connection. 1231 // See: RFC5482 for details. 1232 type TCPUserTimeoutOption time.Duration 1233 1234 func (*TCPUserTimeoutOption) isGettableSocketOption() {} 1235 1236 func (*TCPUserTimeoutOption) isSettableSocketOption() {} 1237 1238 // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get 1239 // the current congestion control algorithm. 1240 type CongestionControlOption string 1241 1242 func (*CongestionControlOption) isGettableSocketOption() {} 1243 1244 func (*CongestionControlOption) isSettableSocketOption() {} 1245 1246 func (*CongestionControlOption) isGettableTransportProtocolOption() {} 1247 1248 func (*CongestionControlOption) isSettableTransportProtocolOption() {} 1249 1250 // TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the 1251 // maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state 1252 // before being marked closed. 1253 type TCPLingerTimeoutOption time.Duration 1254 1255 func (*TCPLingerTimeoutOption) isGettableSocketOption() {} 1256 1257 func (*TCPLingerTimeoutOption) isSettableSocketOption() {} 1258 1259 func (*TCPLingerTimeoutOption) isGettableTransportProtocolOption() {} 1260 1261 func (*TCPLingerTimeoutOption) isSettableTransportProtocolOption() {} 1262 1263 // TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the 1264 // maximum duration for which a socket lingers in the TIME_WAIT state 1265 // before being marked closed. 1266 type TCPTimeWaitTimeoutOption time.Duration 1267 1268 func (*TCPTimeWaitTimeoutOption) isGettableSocketOption() {} 1269 1270 func (*TCPTimeWaitTimeoutOption) isSettableSocketOption() {} 1271 1272 func (*TCPTimeWaitTimeoutOption) isGettableTransportProtocolOption() {} 1273 1274 func (*TCPTimeWaitTimeoutOption) isSettableTransportProtocolOption() {} 1275 1276 // TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a 1277 // accept to return a completed connection only when there is data to be 1278 // read. This usually means the listening socket will drop the final ACK 1279 // for a handshake till the specified timeout until a segment with data arrives. 1280 type TCPDeferAcceptOption time.Duration 1281 1282 func (*TCPDeferAcceptOption) isGettableSocketOption() {} 1283 1284 func (*TCPDeferAcceptOption) isSettableSocketOption() {} 1285 1286 // TCPMinRTOOption is use by SetSockOpt/GetSockOpt to allow overriding 1287 // default MinRTO used by the Stack. 1288 type TCPMinRTOOption time.Duration 1289 1290 func (*TCPMinRTOOption) isGettableSocketOption() {} 1291 1292 func (*TCPMinRTOOption) isSettableSocketOption() {} 1293 1294 func (*TCPMinRTOOption) isGettableTransportProtocolOption() {} 1295 1296 func (*TCPMinRTOOption) isSettableTransportProtocolOption() {} 1297 1298 // TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding 1299 // default MaxRTO used by the Stack. 1300 type TCPMaxRTOOption time.Duration 1301 1302 func (*TCPMaxRTOOption) isGettableSocketOption() {} 1303 1304 func (*TCPMaxRTOOption) isSettableSocketOption() {} 1305 1306 func (*TCPMaxRTOOption) isGettableTransportProtocolOption() {} 1307 1308 func (*TCPMaxRTOOption) isSettableTransportProtocolOption() {} 1309 1310 // TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the 1311 // maximum number of retransmits after which we time out the connection. 1312 type TCPMaxRetriesOption uint64 1313 1314 func (*TCPMaxRetriesOption) isGettableSocketOption() {} 1315 1316 func (*TCPMaxRetriesOption) isSettableSocketOption() {} 1317 1318 func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {} 1319 1320 func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {} 1321 1322 // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide 1323 // default for number of times SYN is retransmitted before aborting a connect. 1324 type TCPSynRetriesOption uint8 1325 1326 func (*TCPSynRetriesOption) isGettableSocketOption() {} 1327 1328 func (*TCPSynRetriesOption) isSettableSocketOption() {} 1329 1330 func (*TCPSynRetriesOption) isGettableTransportProtocolOption() {} 1331 1332 func (*TCPSynRetriesOption) isSettableTransportProtocolOption() {} 1333 1334 // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a 1335 // default interface for multicast. 1336 type MulticastInterfaceOption struct { 1337 NIC NICID 1338 InterfaceAddr Address 1339 } 1340 1341 func (*MulticastInterfaceOption) isGettableSocketOption() {} 1342 1343 func (*MulticastInterfaceOption) isSettableSocketOption() {} 1344 1345 // MembershipOption is used to identify a multicast membership on an interface. 1346 type MembershipOption struct { 1347 NIC NICID 1348 InterfaceAddr Address 1349 MulticastAddr Address 1350 } 1351 1352 // AddMembershipOption identifies a multicast group to join on some interface. 1353 type AddMembershipOption MembershipOption 1354 1355 func (*AddMembershipOption) isSettableSocketOption() {} 1356 1357 // RemoveMembershipOption identifies a multicast group to leave on some 1358 // interface. 1359 type RemoveMembershipOption MembershipOption 1360 1361 func (*RemoveMembershipOption) isSettableSocketOption() {} 1362 1363 // SocketDetachFilterOption is used by SetSockOpt to detach a previously attached 1364 // classic BPF filter on a given endpoint. 1365 type SocketDetachFilterOption int 1366 1367 func (*SocketDetachFilterOption) isSettableSocketOption() {} 1368 1369 // OriginalDestinationOption is used to get the original destination address 1370 // and port of a redirected packet. 1371 type OriginalDestinationOption FullAddress 1372 1373 func (*OriginalDestinationOption) isGettableSocketOption() {} 1374 1375 // TCPTimeWaitReuseOption is used stack.(*Stack).TransportProtocolOption to 1376 // specify if the stack can reuse the port bound by an endpoint in TIME-WAIT for 1377 // new connections when it is safe from protocol viewpoint. 1378 type TCPTimeWaitReuseOption uint8 1379 1380 func (*TCPTimeWaitReuseOption) isGettableSocketOption() {} 1381 1382 func (*TCPTimeWaitReuseOption) isSettableSocketOption() {} 1383 1384 func (*TCPTimeWaitReuseOption) isGettableTransportProtocolOption() {} 1385 1386 func (*TCPTimeWaitReuseOption) isSettableTransportProtocolOption() {} 1387 1388 const ( 1389 // TCPTimeWaitReuseDisabled indicates reuse of port bound by endponts in TIME-WAIT cannot 1390 // be reused for new connections. 1391 TCPTimeWaitReuseDisabled TCPTimeWaitReuseOption = iota 1392 1393 // TCPTimeWaitReuseGlobal indicates reuse of port bound by endponts in TIME-WAIT can 1394 // be reused for new connections irrespective of the src/dest addresses. 1395 TCPTimeWaitReuseGlobal 1396 1397 // TCPTimeWaitReuseLoopbackOnly indicates reuse of port bound by endpoint in TIME-WAIT can 1398 // only be reused if the connection was a connection over loopback. i.e src/dest adddresses 1399 // are loopback addresses. 1400 TCPTimeWaitReuseLoopbackOnly 1401 ) 1402 1403 // LingerOption is used by SetSockOpt/GetSockOpt to set/get the 1404 // duration for which a socket lingers before returning from Close. 1405 // 1406 // +marshal 1407 // +stateify savable 1408 type LingerOption struct { 1409 Enabled bool 1410 Timeout time.Duration 1411 } 1412 1413 // IPPacketInfo is the message structure for IP_PKTINFO. 1414 // 1415 // +stateify savable 1416 type IPPacketInfo struct { 1417 // NIC is the ID of the NIC to be used. 1418 NIC NICID 1419 1420 // LocalAddr is the local address. 1421 LocalAddr Address 1422 1423 // DestinationAddr is the destination address found in the IP header. 1424 DestinationAddr Address 1425 } 1426 1427 // IPv6PacketInfo is the message structure for IPV6_PKTINFO. 1428 // 1429 // +stateify savable 1430 type IPv6PacketInfo struct { 1431 Addr Address 1432 NIC NICID 1433 } 1434 1435 // SendBufferSizeOption is used by stack.(Stack*).Option/SetOption to 1436 // get/set the default, min and max send buffer sizes. 1437 type SendBufferSizeOption struct { 1438 // Min is the minimum size for send buffer. 1439 Min int 1440 1441 // Default is the default size for send buffer. 1442 Default int 1443 1444 // Max is the maximum size for send buffer. 1445 Max int 1446 } 1447 1448 // ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to 1449 // get/set the default, min and max receive buffer sizes. 1450 type ReceiveBufferSizeOption struct { 1451 // Min is the minimum size for send buffer. 1452 Min int 1453 1454 // Default is the default size for send buffer. 1455 Default int 1456 1457 // Max is the maximum size for send buffer. 1458 Max int 1459 } 1460 1461 // GetSendBufferLimits is used to get the send buffer size limits. 1462 type GetSendBufferLimits func(StackHandler) SendBufferSizeOption 1463 1464 // GetStackSendBufferLimits is used to get default, min and max send buffer size. 1465 func GetStackSendBufferLimits(so StackHandler) SendBufferSizeOption { 1466 var ss SendBufferSizeOption 1467 if err := so.Option(&ss); err != nil { 1468 panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err)) 1469 } 1470 return ss 1471 } 1472 1473 // GetReceiveBufferLimits is used to get the send buffer size limits. 1474 type GetReceiveBufferLimits func(StackHandler) ReceiveBufferSizeOption 1475 1476 // GetStackReceiveBufferLimits is used to get default, min and max send buffer size. 1477 func GetStackReceiveBufferLimits(so StackHandler) ReceiveBufferSizeOption { 1478 var ss ReceiveBufferSizeOption 1479 if err := so.Option(&ss); err != nil { 1480 panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err)) 1481 } 1482 return ss 1483 } 1484 1485 // Route is a row in the routing table. It specifies through which NIC (and 1486 // gateway) sets of packets should be routed. A row is considered viable if the 1487 // masked target address matches the destination address in the row. 1488 type Route struct { 1489 // Destination must contain the target address for this row to be viable. 1490 Destination Subnet 1491 1492 // Gateway is the gateway to be used if this row is viable. 1493 Gateway Address 1494 1495 // NIC is the id of the nic to be used if this row is viable. 1496 NIC NICID 1497 } 1498 1499 // String implements the fmt.Stringer interface. 1500 func (r Route) String() string { 1501 var out strings.Builder 1502 _, _ = fmt.Fprintf(&out, "%s", r.Destination) 1503 if r.Gateway.length > 0 { 1504 _, _ = fmt.Fprintf(&out, " via %s", r.Gateway) 1505 } 1506 _, _ = fmt.Fprintf(&out, " nic %d", r.NIC) 1507 return out.String() 1508 } 1509 1510 // Equal returns true if the given Route is equal to this Route. 1511 func (r Route) Equal(to Route) bool { 1512 // NOTE: This relies on the fact that r.Destination == to.Destination 1513 return r.Destination.Equal(to.Destination) && r.Gateway == to.Gateway && r.NIC == to.NIC 1514 } 1515 1516 // TransportProtocolNumber is the number of a transport protocol. 1517 type TransportProtocolNumber uint32 1518 1519 // NetworkProtocolNumber is the EtherType of a network protocol in an Ethernet 1520 // frame. 1521 // 1522 // See: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml 1523 type NetworkProtocolNumber uint32 1524 1525 // A StatCounter keeps track of a statistic. 1526 // 1527 // +stateify savable 1528 type StatCounter struct { 1529 count atomicbitops.Uint64 1530 } 1531 1532 // Increment adds one to the counter. 1533 func (s *StatCounter) Increment() { 1534 s.IncrementBy(1) 1535 } 1536 1537 // Decrement minuses one to the counter. 1538 func (s *StatCounter) Decrement() { 1539 s.IncrementBy(^uint64(0)) 1540 } 1541 1542 // Value returns the current value of the counter. 1543 func (s *StatCounter) Value() uint64 { 1544 return s.count.Load() 1545 } 1546 1547 // IncrementBy increments the counter by v. 1548 func (s *StatCounter) IncrementBy(v uint64) { 1549 s.count.Add(v) 1550 } 1551 1552 func (s *StatCounter) String() string { 1553 return strconv.FormatUint(s.Value(), 10) 1554 } 1555 1556 // A MultiCounterStat keeps track of two counters at once. 1557 type MultiCounterStat struct { 1558 a *StatCounter 1559 b *StatCounter 1560 } 1561 1562 // Init sets both internal counters to point to a and b. 1563 func (m *MultiCounterStat) Init(a, b *StatCounter) { 1564 m.a = a 1565 m.b = b 1566 } 1567 1568 // Increment adds one to the counters. 1569 func (m *MultiCounterStat) Increment() { 1570 m.a.Increment() 1571 m.b.Increment() 1572 } 1573 1574 // IncrementBy increments the counters by v. 1575 func (m *MultiCounterStat) IncrementBy(v uint64) { 1576 m.a.IncrementBy(v) 1577 m.b.IncrementBy(v) 1578 } 1579 1580 // ICMPv4PacketStats enumerates counts for all ICMPv4 packet types. 1581 type ICMPv4PacketStats struct { 1582 // LINT.IfChange(ICMPv4PacketStats) 1583 1584 // EchoRequest is the number of ICMPv4 echo packets counted. 1585 EchoRequest *StatCounter 1586 1587 // EchoReply is the number of ICMPv4 echo reply packets counted. 1588 EchoReply *StatCounter 1589 1590 // DstUnreachable is the number of ICMPv4 destination unreachable packets 1591 // counted. 1592 DstUnreachable *StatCounter 1593 1594 // SrcQuench is the number of ICMPv4 source quench packets counted. 1595 SrcQuench *StatCounter 1596 1597 // Redirect is the number of ICMPv4 redirect packets counted. 1598 Redirect *StatCounter 1599 1600 // TimeExceeded is the number of ICMPv4 time exceeded packets counted. 1601 TimeExceeded *StatCounter 1602 1603 // ParamProblem is the number of ICMPv4 parameter problem packets counted. 1604 ParamProblem *StatCounter 1605 1606 // Timestamp is the number of ICMPv4 timestamp packets counted. 1607 Timestamp *StatCounter 1608 1609 // TimestampReply is the number of ICMPv4 timestamp reply packets counted. 1610 TimestampReply *StatCounter 1611 1612 // InfoRequest is the number of ICMPv4 information request packets counted. 1613 InfoRequest *StatCounter 1614 1615 // InfoReply is the number of ICMPv4 information reply packets counted. 1616 InfoReply *StatCounter 1617 1618 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4PacketStats) 1619 } 1620 1621 // ICMPv4SentPacketStats collects outbound ICMPv4-specific stats. 1622 type ICMPv4SentPacketStats struct { 1623 // LINT.IfChange(ICMPv4SentPacketStats) 1624 1625 ICMPv4PacketStats 1626 1627 // Dropped is the number of ICMPv4 packets dropped due to link layer errors. 1628 Dropped *StatCounter 1629 1630 // RateLimited is the number of ICMPv4 packets dropped due to rate limit being 1631 // exceeded. 1632 RateLimited *StatCounter 1633 1634 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4SentPacketStats) 1635 } 1636 1637 // ICMPv4ReceivedPacketStats collects inbound ICMPv4-specific stats. 1638 type ICMPv4ReceivedPacketStats struct { 1639 // LINT.IfChange(ICMPv4ReceivedPacketStats) 1640 1641 ICMPv4PacketStats 1642 1643 // Invalid is the number of invalid ICMPv4 packets received. 1644 Invalid *StatCounter 1645 1646 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4ReceivedPacketStats) 1647 } 1648 1649 // ICMPv4Stats collects ICMPv4-specific stats. 1650 type ICMPv4Stats struct { 1651 // LINT.IfChange(ICMPv4Stats) 1652 1653 // PacketsSent contains statistics about sent packets. 1654 PacketsSent ICMPv4SentPacketStats 1655 1656 // PacketsReceived contains statistics about received packets. 1657 PacketsReceived ICMPv4ReceivedPacketStats 1658 1659 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4Stats) 1660 } 1661 1662 // ICMPv6PacketStats enumerates counts for all ICMPv6 packet types. 1663 type ICMPv6PacketStats struct { 1664 // LINT.IfChange(ICMPv6PacketStats) 1665 1666 // EchoRequest is the number of ICMPv6 echo request packets counted. 1667 EchoRequest *StatCounter 1668 1669 // EchoReply is the number of ICMPv6 echo reply packets counted. 1670 EchoReply *StatCounter 1671 1672 // DstUnreachable is the number of ICMPv6 destination unreachable packets 1673 // counted. 1674 DstUnreachable *StatCounter 1675 1676 // PacketTooBig is the number of ICMPv6 packet too big packets counted. 1677 PacketTooBig *StatCounter 1678 1679 // TimeExceeded is the number of ICMPv6 time exceeded packets counted. 1680 TimeExceeded *StatCounter 1681 1682 // ParamProblem is the number of ICMPv6 parameter problem packets counted. 1683 ParamProblem *StatCounter 1684 1685 // RouterSolicit is the number of ICMPv6 router solicit packets counted. 1686 RouterSolicit *StatCounter 1687 1688 // RouterAdvert is the number of ICMPv6 router advert packets counted. 1689 RouterAdvert *StatCounter 1690 1691 // NeighborSolicit is the number of ICMPv6 neighbor solicit packets counted. 1692 NeighborSolicit *StatCounter 1693 1694 // NeighborAdvert is the number of ICMPv6 neighbor advert packets counted. 1695 NeighborAdvert *StatCounter 1696 1697 // RedirectMsg is the number of ICMPv6 redirect message packets counted. 1698 RedirectMsg *StatCounter 1699 1700 // MulticastListenerQuery is the number of Multicast Listener Query messages 1701 // counted. 1702 MulticastListenerQuery *StatCounter 1703 1704 // MulticastListenerReport is the number of Multicast Listener Report messages 1705 // counted. 1706 MulticastListenerReport *StatCounter 1707 1708 // MulticastListenerReportV2 is the number of Multicast Listener Report 1709 // messages counted. 1710 MulticastListenerReportV2 *StatCounter 1711 1712 // MulticastListenerDone is the number of Multicast Listener Done messages 1713 // counted. 1714 MulticastListenerDone *StatCounter 1715 1716 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6PacketStats) 1717 } 1718 1719 // ICMPv6SentPacketStats collects outbound ICMPv6-specific stats. 1720 type ICMPv6SentPacketStats struct { 1721 // LINT.IfChange(ICMPv6SentPacketStats) 1722 1723 ICMPv6PacketStats 1724 1725 // Dropped is the number of ICMPv6 packets dropped due to link layer errors. 1726 Dropped *StatCounter 1727 1728 // RateLimited is the number of ICMPv6 packets dropped due to rate limit being 1729 // exceeded. 1730 RateLimited *StatCounter 1731 1732 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6SentPacketStats) 1733 } 1734 1735 // ICMPv6ReceivedPacketStats collects inbound ICMPv6-specific stats. 1736 type ICMPv6ReceivedPacketStats struct { 1737 // LINT.IfChange(ICMPv6ReceivedPacketStats) 1738 1739 ICMPv6PacketStats 1740 1741 // Unrecognized is the number of ICMPv6 packets received that the transport 1742 // layer does not know how to parse. 1743 Unrecognized *StatCounter 1744 1745 // Invalid is the number of invalid ICMPv6 packets received. 1746 Invalid *StatCounter 1747 1748 // RouterOnlyPacketsDroppedByHost is the number of ICMPv6 packets dropped due 1749 // to being router-specific packets. 1750 RouterOnlyPacketsDroppedByHost *StatCounter 1751 1752 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6ReceivedPacketStats) 1753 } 1754 1755 // ICMPv6Stats collects ICMPv6-specific stats. 1756 type ICMPv6Stats struct { 1757 // LINT.IfChange(ICMPv6Stats) 1758 1759 // PacketsSent contains statistics about sent packets. 1760 PacketsSent ICMPv6SentPacketStats 1761 1762 // PacketsReceived contains statistics about received packets. 1763 PacketsReceived ICMPv6ReceivedPacketStats 1764 1765 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6Stats) 1766 } 1767 1768 // ICMPStats collects ICMP-specific stats (both v4 and v6). 1769 type ICMPStats struct { 1770 // V4 contains the ICMPv4-specifics stats. 1771 V4 ICMPv4Stats 1772 1773 // V6 contains the ICMPv4-specifics stats. 1774 V6 ICMPv6Stats 1775 } 1776 1777 // IGMPPacketStats enumerates counts for all IGMP packet types. 1778 type IGMPPacketStats struct { 1779 // LINT.IfChange(IGMPPacketStats) 1780 1781 // MembershipQuery is the number of Membership Query messages counted. 1782 MembershipQuery *StatCounter 1783 1784 // V1MembershipReport is the number of Version 1 Membership Report messages 1785 // counted. 1786 V1MembershipReport *StatCounter 1787 1788 // V2MembershipReport is the number of Version 2 Membership Report messages 1789 // counted. 1790 V2MembershipReport *StatCounter 1791 1792 // V3MembershipReport is the number of Version 3 Membership Report messages 1793 // counted. 1794 V3MembershipReport *StatCounter 1795 1796 // LeaveGroup is the number of Leave Group messages counted. 1797 LeaveGroup *StatCounter 1798 1799 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPPacketStats) 1800 } 1801 1802 // IGMPSentPacketStats collects outbound IGMP-specific stats. 1803 type IGMPSentPacketStats struct { 1804 // LINT.IfChange(IGMPSentPacketStats) 1805 1806 IGMPPacketStats 1807 1808 // Dropped is the number of IGMP packets dropped. 1809 Dropped *StatCounter 1810 1811 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPSentPacketStats) 1812 } 1813 1814 // IGMPReceivedPacketStats collects inbound IGMP-specific stats. 1815 type IGMPReceivedPacketStats struct { 1816 // LINT.IfChange(IGMPReceivedPacketStats) 1817 1818 IGMPPacketStats 1819 1820 // Invalid is the number of invalid IGMP packets received. 1821 Invalid *StatCounter 1822 1823 // ChecksumErrors is the number of IGMP packets dropped due to bad checksums. 1824 ChecksumErrors *StatCounter 1825 1826 // Unrecognized is the number of unrecognized messages counted, these are 1827 // silently ignored for forward-compatibilty. 1828 Unrecognized *StatCounter 1829 1830 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPReceivedPacketStats) 1831 } 1832 1833 // IGMPStats collects IGMP-specific stats. 1834 type IGMPStats struct { 1835 // LINT.IfChange(IGMPStats) 1836 1837 // PacketsSent contains statistics about sent packets. 1838 PacketsSent IGMPSentPacketStats 1839 1840 // PacketsReceived contains statistics about received packets. 1841 PacketsReceived IGMPReceivedPacketStats 1842 1843 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPStats) 1844 } 1845 1846 // IPForwardingStats collects stats related to IP forwarding (both v4 and v6). 1847 type IPForwardingStats struct { 1848 // LINT.IfChange(IPForwardingStats) 1849 1850 // Unrouteable is the number of IP packets received which were dropped 1851 // because a route to their destination could not be constructed. 1852 Unrouteable *StatCounter 1853 1854 // ExhaustedTTL is the number of IP packets received which were dropped 1855 // because their TTL was exhausted. 1856 ExhaustedTTL *StatCounter 1857 1858 // InitializingSource is the number of IP packets which were dropped 1859 // because they contained a source address that may only be used on the local 1860 // network as part of initialization work. 1861 InitializingSource *StatCounter 1862 1863 // LinkLocalSource is the number of IP packets which were dropped 1864 // because they contained a link-local source address. 1865 LinkLocalSource *StatCounter 1866 1867 // LinkLocalDestination is the number of IP packets which were dropped 1868 // because they contained a link-local destination address. 1869 LinkLocalDestination *StatCounter 1870 1871 // PacketTooBig is the number of IP packets which were dropped because they 1872 // were too big for the outgoing MTU. 1873 PacketTooBig *StatCounter 1874 1875 // HostUnreachable is the number of IP packets received which could not be 1876 // successfully forwarded due to an unresolvable next hop. 1877 HostUnreachable *StatCounter 1878 1879 // ExtensionHeaderProblem is the number of IP packets which were dropped 1880 // because of a problem encountered when processing an IPv6 extension 1881 // header. 1882 ExtensionHeaderProblem *StatCounter 1883 1884 // UnexpectedMulticastInputInterface is the number of multicast packets that 1885 // were received on an interface that did not match the corresponding route's 1886 // expected input interface. 1887 UnexpectedMulticastInputInterface *StatCounter 1888 1889 // UnknownOutputEndpoint is the number of packets that could not be forwarded 1890 // because the output endpoint could not be found. 1891 UnknownOutputEndpoint *StatCounter 1892 1893 // NoMulticastPendingQueueBufferSpace is the number of multicast packets that 1894 // were dropped due to insufficent buffer space in the pending packet queue. 1895 NoMulticastPendingQueueBufferSpace *StatCounter 1896 1897 // OutgoingDeviceNoBufferSpace is the number of packets that were dropped due 1898 // to insufficient space in the outgoing device. 1899 OutgoingDeviceNoBufferSpace *StatCounter 1900 1901 // Errors is the number of IP packets received which could not be 1902 // successfully forwarded. 1903 Errors *StatCounter 1904 1905 // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPForwardingStats) 1906 } 1907 1908 // IPStats collects IP-specific stats (both v4 and v6). 1909 type IPStats struct { 1910 // LINT.IfChange(IPStats) 1911 1912 // PacketsReceived is the number of IP packets received from the link layer. 1913 PacketsReceived *StatCounter 1914 1915 // ValidPacketsReceived is the number of valid IP packets that reached the IP 1916 // layer. 1917 ValidPacketsReceived *StatCounter 1918 1919 // DisabledPacketsReceived is the number of IP packets received from the link 1920 // layer when the IP layer is disabled. 1921 DisabledPacketsReceived *StatCounter 1922 1923 // InvalidDestinationAddressesReceived is the number of IP packets received 1924 // with an unknown or invalid destination address. 1925 InvalidDestinationAddressesReceived *StatCounter 1926 1927 // InvalidSourceAddressesReceived is the number of IP packets received with a 1928 // source address that should never have been received on the wire. 1929 InvalidSourceAddressesReceived *StatCounter 1930 1931 // PacketsDelivered is the number of incoming IP packets that are successfully 1932 // delivered to the transport layer. 1933 PacketsDelivered *StatCounter 1934 1935 // PacketsSent is the number of IP packets sent via WritePacket. 1936 PacketsSent *StatCounter 1937 1938 // OutgoingPacketErrors is the number of IP packets which failed to write to a 1939 // link-layer endpoint. 1940 OutgoingPacketErrors *StatCounter 1941 1942 // MalformedPacketsReceived is the number of IP Packets that were dropped due 1943 // to the IP packet header failing validation checks. 1944 MalformedPacketsReceived *StatCounter 1945 1946 // MalformedFragmentsReceived is the number of IP Fragments that were dropped 1947 // due to the fragment failing validation checks. 1948 MalformedFragmentsReceived *StatCounter 1949 1950 // IPTablesPreroutingDropped is the number of IP packets dropped in the 1951 // Prerouting chain. 1952 IPTablesPreroutingDropped *StatCounter 1953 1954 // IPTablesInputDropped is the number of IP packets dropped in the Input 1955 // chain. 1956 IPTablesInputDropped *StatCounter 1957 1958 // IPTablesForwardDropped is the number of IP packets dropped in the Forward 1959 // chain. 1960 IPTablesForwardDropped *StatCounter 1961 1962 // IPTablesOutputDropped is the number of IP packets dropped in the Output 1963 // chain. 1964 IPTablesOutputDropped *StatCounter 1965 1966 // IPTablesPostroutingDropped is the number of IP packets dropped in the 1967 // Postrouting chain. 1968 IPTablesPostroutingDropped *StatCounter 1969 1970 // TODO(https://gvisor.dev/issues/5529): Move the IPv4-only option stats out 1971 // of IPStats. 1972 // OptionTimestampReceived is the number of Timestamp options seen. 1973 OptionTimestampReceived *StatCounter 1974 1975 // OptionRecordRouteReceived is the number of Record Route options seen. 1976 OptionRecordRouteReceived *StatCounter 1977 1978 // OptionRouterAlertReceived is the number of Router Alert options seen. 1979 OptionRouterAlertReceived *StatCounter 1980 1981 // OptionUnknownReceived is the number of unknown IP options seen. 1982 OptionUnknownReceived *StatCounter 1983 1984 // Forwarding collects stats related to IP forwarding. 1985 Forwarding IPForwardingStats 1986 1987 // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPStats) 1988 } 1989 1990 // ARPStats collects ARP-specific stats. 1991 type ARPStats struct { 1992 // LINT.IfChange(ARPStats) 1993 1994 // PacketsReceived is the number of ARP packets received from the link layer. 1995 PacketsReceived *StatCounter 1996 1997 // DisabledPacketsReceived is the number of ARP packets received from the link 1998 // layer when the ARP layer is disabled. 1999 DisabledPacketsReceived *StatCounter 2000 2001 // MalformedPacketsReceived is the number of ARP packets that were dropped due 2002 // to being malformed. 2003 MalformedPacketsReceived *StatCounter 2004 2005 // RequestsReceived is the number of ARP requests received. 2006 RequestsReceived *StatCounter 2007 2008 // RequestsReceivedUnknownTargetAddress is the number of ARP requests that 2009 // were targeted to an interface different from the one it was received on. 2010 RequestsReceivedUnknownTargetAddress *StatCounter 2011 2012 // OutgoingRequestInterfaceHasNoLocalAddressErrors is the number of failures 2013 // to send an ARP request because the interface has no network address 2014 // assigned to it. 2015 OutgoingRequestInterfaceHasNoLocalAddressErrors *StatCounter 2016 2017 // OutgoingRequestBadLocalAddressErrors is the number of failures to send an 2018 // ARP request with a bad local address. 2019 OutgoingRequestBadLocalAddressErrors *StatCounter 2020 2021 // OutgoingRequestsDropped is the number of ARP requests which failed to write 2022 // to a link-layer endpoint. 2023 OutgoingRequestsDropped *StatCounter 2024 2025 // OutgoingRequestSent is the number of ARP requests successfully written to a 2026 // link-layer endpoint. 2027 OutgoingRequestsSent *StatCounter 2028 2029 // RepliesReceived is the number of ARP replies received. 2030 RepliesReceived *StatCounter 2031 2032 // OutgoingRepliesDropped is the number of ARP replies which failed to write 2033 // to a link-layer endpoint. 2034 OutgoingRepliesDropped *StatCounter 2035 2036 // OutgoingRepliesSent is the number of ARP replies successfully written to a 2037 // link-layer endpoint. 2038 OutgoingRepliesSent *StatCounter 2039 2040 // LINT.ThenChange(network/arp/stats.go:multiCounterARPStats) 2041 } 2042 2043 // TCPStats collects TCP-specific stats. 2044 type TCPStats struct { 2045 // ActiveConnectionOpenings is the number of connections opened 2046 // successfully via Connect. 2047 ActiveConnectionOpenings *StatCounter 2048 2049 // PassiveConnectionOpenings is the number of connections opened 2050 // successfully via Listen. 2051 PassiveConnectionOpenings *StatCounter 2052 2053 // CurrentEstablished is the number of TCP connections for which the 2054 // current state is ESTABLISHED. 2055 CurrentEstablished *StatCounter 2056 2057 // CurrentConnected is the number of TCP connections that 2058 // are in connected state. 2059 CurrentConnected *StatCounter 2060 2061 // EstablishedResets is the number of times TCP connections have made 2062 // a direct transition to the CLOSED state from either the 2063 // ESTABLISHED state or the CLOSE-WAIT state. 2064 EstablishedResets *StatCounter 2065 2066 // EstablishedClosed is the number of times established TCP connections 2067 // made a transition to CLOSED state. 2068 EstablishedClosed *StatCounter 2069 2070 // EstablishedTimedout is the number of times an established connection 2071 // was reset because of keep-alive time out. 2072 EstablishedTimedout *StatCounter 2073 2074 // ListenOverflowSynDrop is the number of times the listen queue overflowed 2075 // and a SYN was dropped. 2076 ListenOverflowSynDrop *StatCounter 2077 2078 // ListenOverflowAckDrop is the number of times the final ACK 2079 // in the handshake was dropped due to overflow. 2080 ListenOverflowAckDrop *StatCounter 2081 2082 // ListenOverflowCookieSent is the number of times a SYN cookie was sent. 2083 ListenOverflowSynCookieSent *StatCounter 2084 2085 // ListenOverflowSynCookieRcvd is the number of times a valid SYN 2086 // cookie was received. 2087 ListenOverflowSynCookieRcvd *StatCounter 2088 2089 // ListenOverflowInvalidSynCookieRcvd is the number of times an invalid SYN cookie 2090 // was received. 2091 ListenOverflowInvalidSynCookieRcvd *StatCounter 2092 2093 // FailedConnectionAttempts is the number of calls to Connect or Listen 2094 // (active and passive openings, respectively) that end in an error. 2095 FailedConnectionAttempts *StatCounter 2096 2097 // ValidSegmentsReceived is the number of TCP segments received that 2098 // the transport layer successfully parsed. 2099 ValidSegmentsReceived *StatCounter 2100 2101 // InvalidSegmentsReceived is the number of TCP segments received that 2102 // the transport layer could not parse. 2103 InvalidSegmentsReceived *StatCounter 2104 2105 // SegmentsSent is the number of TCP segments sent. 2106 SegmentsSent *StatCounter 2107 2108 // SegmentSendErrors is the number of TCP segments failed to be sent. 2109 SegmentSendErrors *StatCounter 2110 2111 // ResetsSent is the number of TCP resets sent. 2112 ResetsSent *StatCounter 2113 2114 // ResetsReceived is the number of TCP resets received. 2115 ResetsReceived *StatCounter 2116 2117 // Retransmits is the number of TCP segments retransmitted. 2118 Retransmits *StatCounter 2119 2120 // FastRecovery is the number of times Fast Recovery was used to 2121 // recover from packet loss. 2122 FastRecovery *StatCounter 2123 2124 // SACKRecovery is the number of times SACK Recovery was used to 2125 // recover from packet loss. 2126 SACKRecovery *StatCounter 2127 2128 // TLPRecovery is the number of times recovery was accomplished by the tail 2129 // loss probe. 2130 TLPRecovery *StatCounter 2131 2132 // SlowStartRetransmits is the number of segments retransmitted in slow 2133 // start. 2134 SlowStartRetransmits *StatCounter 2135 2136 // FastRetransmit is the number of segments retransmitted in fast 2137 // recovery. 2138 FastRetransmit *StatCounter 2139 2140 // Timeouts is the number of times the RTO expired. 2141 Timeouts *StatCounter 2142 2143 // ChecksumErrors is the number of segments dropped due to bad checksums. 2144 ChecksumErrors *StatCounter 2145 2146 // FailedPortReservations is the number of times TCP failed to reserve 2147 // a port. 2148 FailedPortReservations *StatCounter 2149 2150 // SegmentsAckedWithDSACK is the number of segments acknowledged with 2151 // DSACK. 2152 SegmentsAckedWithDSACK *StatCounter 2153 2154 // SpuriousRecovery is the number of times the connection entered loss 2155 // recovery spuriously. 2156 SpuriousRecovery *StatCounter 2157 2158 // SpuriousRTORecovery is the number of spurious RTOs. 2159 SpuriousRTORecovery *StatCounter 2160 2161 // ForwardMaxInFlightDrop is the number of connection requests that are 2162 // dropped due to exceeding the maximum number of in-flight connection 2163 // requests. 2164 ForwardMaxInFlightDrop *StatCounter 2165 } 2166 2167 // UDPStats collects UDP-specific stats. 2168 type UDPStats struct { 2169 // PacketsReceived is the number of UDP datagrams received via 2170 // HandlePacket. 2171 PacketsReceived *StatCounter 2172 2173 // UnknownPortErrors is the number of incoming UDP datagrams dropped 2174 // because they did not have a known destination port. 2175 UnknownPortErrors *StatCounter 2176 2177 // ReceiveBufferErrors is the number of incoming UDP datagrams dropped 2178 // due to the receiving buffer being in an invalid state. 2179 ReceiveBufferErrors *StatCounter 2180 2181 // MalformedPacketsReceived is the number of incoming UDP datagrams 2182 // dropped due to the UDP header being in a malformed state. 2183 MalformedPacketsReceived *StatCounter 2184 2185 // PacketsSent is the number of UDP datagrams sent via sendUDP. 2186 PacketsSent *StatCounter 2187 2188 // PacketSendErrors is the number of datagrams failed to be sent. 2189 PacketSendErrors *StatCounter 2190 2191 // ChecksumErrors is the number of datagrams dropped due to bad checksums. 2192 ChecksumErrors *StatCounter 2193 } 2194 2195 // NICNeighborStats holds metrics for the neighbor table. 2196 type NICNeighborStats struct { 2197 // LINT.IfChange(NICNeighborStats) 2198 2199 // UnreachableEntryLookups counts the number of lookups performed on an 2200 // entry in Unreachable state. 2201 UnreachableEntryLookups *StatCounter 2202 2203 // DroppedConfirmationForNoninitiatedNeighbor counts the number of neighbor 2204 // responses that were dropped because they didn't match an entry in the 2205 // cache. 2206 DroppedConfirmationForNoninitiatedNeighbor *StatCounter 2207 2208 // DroppedInvalidLinkAddressConfirmations counts the number of neighbor 2209 // responses that were ignored because they had an invalid source link-layer 2210 // address. 2211 DroppedInvalidLinkAddressConfirmations *StatCounter 2212 2213 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICNeighborStats) 2214 } 2215 2216 // NICPacketStats holds basic packet statistics. 2217 type NICPacketStats struct { 2218 // LINT.IfChange(NICPacketStats) 2219 2220 // Packets is the number of packets counted. 2221 Packets *StatCounter 2222 2223 // Bytes is the number of bytes counted. 2224 Bytes *StatCounter 2225 2226 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICPacketStats) 2227 } 2228 2229 // IntegralStatCounterMap holds a map associating integral keys with 2230 // StatCounters. 2231 type IntegralStatCounterMap struct { 2232 mu sync.RWMutex 2233 // +checklocks:mu 2234 counterMap map[uint64]*StatCounter 2235 } 2236 2237 // Keys returns all keys present in the map. 2238 func (m *IntegralStatCounterMap) Keys() []uint64 { 2239 m.mu.RLock() 2240 defer m.mu.RUnlock() 2241 var keys []uint64 2242 for k := range m.counterMap { 2243 keys = append(keys, k) 2244 } 2245 return keys 2246 } 2247 2248 // Get returns the counter mapped by the provided key. 2249 func (m *IntegralStatCounterMap) Get(key uint64) (*StatCounter, bool) { 2250 m.mu.RLock() 2251 defer m.mu.RUnlock() 2252 counter, ok := m.counterMap[key] 2253 return counter, ok 2254 } 2255 2256 // Init initializes the map. 2257 func (m *IntegralStatCounterMap) Init() { 2258 m.mu.Lock() 2259 defer m.mu.Unlock() 2260 m.counterMap = make(map[uint64]*StatCounter) 2261 } 2262 2263 // Increment increments the counter associated with the provided key. 2264 func (m *IntegralStatCounterMap) Increment(key uint64) { 2265 m.mu.RLock() 2266 counter, ok := m.counterMap[key] 2267 m.mu.RUnlock() 2268 2269 if !ok { 2270 m.mu.Lock() 2271 counter, ok = m.counterMap[key] 2272 if !ok { 2273 counter = new(StatCounter) 2274 m.counterMap[key] = counter 2275 } 2276 m.mu.Unlock() 2277 } 2278 counter.Increment() 2279 } 2280 2281 // A MultiIntegralStatCounterMap keeps track of two integral counter maps at 2282 // once. 2283 type MultiIntegralStatCounterMap struct { 2284 a *IntegralStatCounterMap 2285 b *IntegralStatCounterMap 2286 } 2287 2288 // Init sets the internal integral counter maps to point to a and b. 2289 func (m *MultiIntegralStatCounterMap) Init(a, b *IntegralStatCounterMap) { 2290 m.a = a 2291 m.b = b 2292 } 2293 2294 // Increment increments the counter in each map corresponding to the 2295 // provided key. 2296 func (m *MultiIntegralStatCounterMap) Increment(key uint64) { 2297 m.a.Increment(key) 2298 m.b.Increment(key) 2299 } 2300 2301 // NICStats holds NIC statistics. 2302 type NICStats struct { 2303 // LINT.IfChange(NICStats) 2304 2305 // UnknownL3ProtocolRcvdPacketCounts records the number of packets recieved 2306 // for each unknown or unsupported netowrk protocol number. 2307 UnknownL3ProtocolRcvdPacketCounts *IntegralStatCounterMap 2308 2309 // UnknownL4ProtocolRcvdPacketCounts records the number of packets recieved 2310 // for each unknown or unsupported transport protocol number. 2311 UnknownL4ProtocolRcvdPacketCounts *IntegralStatCounterMap 2312 2313 // MalformedL4RcvdPackets is the number of packets received by a NIC that 2314 // could not be delivered to a transport endpoint because the L4 header could 2315 // not be parsed. 2316 MalformedL4RcvdPackets *StatCounter 2317 2318 // Tx contains statistics about transmitted packets. 2319 Tx NICPacketStats 2320 2321 // TxPacketsDroppedNoBufferSpace is the number of packets dropepd due to the 2322 // NIC not having enough buffer space to send the packet. 2323 // 2324 // Packets may be dropped with a no buffer space error when the device TX 2325 // queue is full. 2326 TxPacketsDroppedNoBufferSpace *StatCounter 2327 2328 // Rx contains statistics about received packets. 2329 Rx NICPacketStats 2330 2331 // DisabledRx contains statistics about received packets on disabled NICs. 2332 DisabledRx NICPacketStats 2333 2334 // Neighbor contains statistics about neighbor entries. 2335 Neighbor NICNeighborStats 2336 2337 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICStats) 2338 } 2339 2340 // FillIn returns a copy of s with nil fields initialized to new StatCounters. 2341 func (s NICStats) FillIn() NICStats { 2342 InitStatCounters(reflect.ValueOf(&s).Elem()) 2343 return s 2344 } 2345 2346 // Stats holds statistics about the networking stack. 2347 type Stats struct { 2348 // TODO(https://gvisor.dev/issues/5986): Make the DroppedPackets stat less 2349 // ambiguous. 2350 2351 // DroppedPackets is the number of packets dropped at the transport layer. 2352 DroppedPackets *StatCounter 2353 2354 // NICs is an aggregation of every NIC's statistics. These should not be 2355 // incremented using this field, but using the relevant NIC multicounters. 2356 NICs NICStats 2357 2358 // ICMP is an aggregation of every NetworkEndpoint's ICMP statistics (both v4 2359 // and v6). These should not be incremented using this field, but using the 2360 // relevant NetworkEndpoint ICMP multicounters. 2361 ICMP ICMPStats 2362 2363 // IGMP is an aggregation of every NetworkEndpoint's IGMP statistics. These 2364 // should not be incremented using this field, but using the relevant 2365 // NetworkEndpoint IGMP multicounters. 2366 IGMP IGMPStats 2367 2368 // IP is an aggregation of every NetworkEndpoint's IP statistics. These should 2369 // not be incremented using this field, but using the relevant NetworkEndpoint 2370 // IP multicounters. 2371 IP IPStats 2372 2373 // ARP is an aggregation of every NetworkEndpoint's ARP statistics. These 2374 // should not be incremented using this field, but using the relevant 2375 // NetworkEndpoint ARP multicounters. 2376 ARP ARPStats 2377 2378 // TCP holds TCP-specific stats. 2379 TCP TCPStats 2380 2381 // UDP holds UDP-specific stats. 2382 UDP UDPStats 2383 } 2384 2385 // ReceiveErrors collects packet receive errors within transport endpoint. 2386 // 2387 // +stateify savable 2388 type ReceiveErrors struct { 2389 // ReceiveBufferOverflow is the number of received packets dropped 2390 // due to the receive buffer being full. 2391 ReceiveBufferOverflow StatCounter 2392 2393 // MalformedPacketsReceived is the number of incoming packets 2394 // dropped due to the packet header being in a malformed state. 2395 MalformedPacketsReceived StatCounter 2396 2397 // ClosedReceiver is the number of received packets dropped because 2398 // of receiving endpoint state being closed. 2399 ClosedReceiver StatCounter 2400 2401 // ChecksumErrors is the number of packets dropped due to bad checksums. 2402 ChecksumErrors StatCounter 2403 } 2404 2405 // SendErrors collects packet send errors within the transport layer for an 2406 // endpoint. 2407 // 2408 // +stateify savable 2409 type SendErrors struct { 2410 // SendToNetworkFailed is the number of packets failed to be written to 2411 // the network endpoint. 2412 SendToNetworkFailed StatCounter 2413 2414 // NoRoute is the number of times we failed to resolve IP route. 2415 NoRoute StatCounter 2416 } 2417 2418 // ReadErrors collects segment read errors from an endpoint read call. 2419 // 2420 // +stateify savable 2421 type ReadErrors struct { 2422 // ReadClosed is the number of received packet drops because the endpoint 2423 // was shutdown for read. 2424 ReadClosed StatCounter 2425 2426 // InvalidEndpointState is the number of times we found the endpoint state 2427 // to be unexpected. 2428 InvalidEndpointState StatCounter 2429 2430 // NotConnected is the number of times we tried to read but found that the 2431 // endpoint was not connected. 2432 NotConnected StatCounter 2433 } 2434 2435 // WriteErrors collects packet write errors from an endpoint write call. 2436 // 2437 // +stateify savable 2438 type WriteErrors struct { 2439 // WriteClosed is the number of packet drops because the endpoint 2440 // was shutdown for write. 2441 WriteClosed StatCounter 2442 2443 // InvalidEndpointState is the number of times we found the endpoint state 2444 // to be unexpected. 2445 InvalidEndpointState StatCounter 2446 2447 // InvalidArgs is the number of times invalid input arguments were 2448 // provided for endpoint Write call. 2449 InvalidArgs StatCounter 2450 } 2451 2452 // TransportEndpointStats collects statistics about the endpoint. 2453 // 2454 // +stateify savable 2455 type TransportEndpointStats struct { 2456 // PacketsReceived is the number of successful packet receives. 2457 PacketsReceived StatCounter 2458 2459 // PacketsSent is the number of successful packet sends. 2460 PacketsSent StatCounter 2461 2462 // ReceiveErrors collects packet receive errors within transport layer. 2463 ReceiveErrors ReceiveErrors 2464 2465 // ReadErrors collects packet read errors from an endpoint read call. 2466 ReadErrors ReadErrors 2467 2468 // SendErrors collects packet send errors within the transport layer. 2469 SendErrors SendErrors 2470 2471 // WriteErrors collects packet write errors from an endpoint write call. 2472 WriteErrors WriteErrors 2473 } 2474 2475 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats 2476 // marker interface. 2477 func (*TransportEndpointStats) IsEndpointStats() {} 2478 2479 // InitStatCounters initializes v's fields with nil StatCounter fields to new 2480 // StatCounters. 2481 func InitStatCounters(v reflect.Value) { 2482 for i := 0; i < v.NumField(); i++ { 2483 v := v.Field(i) 2484 if s, ok := v.Addr().Interface().(**StatCounter); ok { 2485 if *s == nil { 2486 *s = new(StatCounter) 2487 } 2488 } else if s, ok := v.Addr().Interface().(**IntegralStatCounterMap); ok { 2489 if *s == nil { 2490 *s = new(IntegralStatCounterMap) 2491 (*s).Init() 2492 } 2493 } else { 2494 InitStatCounters(v) 2495 } 2496 } 2497 } 2498 2499 // FillIn returns a copy of s with nil fields initialized to new StatCounters. 2500 func (s Stats) FillIn() Stats { 2501 InitStatCounters(reflect.ValueOf(&s).Elem()) 2502 return s 2503 } 2504 2505 // Clone clones a copy of the TransportEndpointStats into dst by atomically 2506 // reading each field. 2507 func (src *TransportEndpointStats) Clone(dst *TransportEndpointStats) { 2508 clone(reflect.ValueOf(dst).Elem(), reflect.ValueOf(src).Elem()) 2509 } 2510 2511 func clone(dst reflect.Value, src reflect.Value) { 2512 for i := 0; i < dst.NumField(); i++ { 2513 d := dst.Field(i) 2514 s := src.Field(i) 2515 if c, ok := s.Addr().Interface().(*StatCounter); ok { 2516 d.Addr().Interface().(*StatCounter).IncrementBy(c.Value()) 2517 } else { 2518 clone(d, s) 2519 } 2520 } 2521 } 2522 2523 // String implements the fmt.Stringer interface. 2524 func (a Address) String() string { 2525 switch l := a.Len(); l { 2526 case 4: 2527 return fmt.Sprintf("%d.%d.%d.%d", int(a.addr[0]), int(a.addr[1]), int(a.addr[2]), int(a.addr[3])) 2528 case 16: 2529 // Find the longest subsequence of hexadecimal zeros. 2530 start, end := -1, -1 2531 for i := 0; i < a.Len(); i += 2 { 2532 j := i 2533 for j < a.Len() && a.addr[j] == 0 && a.addr[j+1] == 0 { 2534 j += 2 2535 } 2536 if j > i+2 && j-i > end-start { 2537 start, end = i, j 2538 } 2539 } 2540 2541 var b strings.Builder 2542 for i := 0; i < a.Len(); i += 2 { 2543 if i == start { 2544 b.WriteString("::") 2545 i = end 2546 if end >= a.Len() { 2547 break 2548 } 2549 } else if i > 0 { 2550 b.WriteByte(':') 2551 } 2552 v := uint16(a.addr[i+0])<<8 | uint16(a.addr[i+1]) 2553 if v == 0 { 2554 b.WriteByte('0') 2555 } else { 2556 const digits = "0123456789abcdef" 2557 for i := uint(3); i < 4; i-- { 2558 if v := v >> (i * 4); v != 0 { 2559 b.WriteByte(digits[v&0xf]) 2560 } 2561 } 2562 } 2563 } 2564 return b.String() 2565 default: 2566 return fmt.Sprintf("%x", a.addr[:l]) 2567 } 2568 } 2569 2570 // To4 converts the IPv4 address to a 4-byte representation. 2571 // If the address is not an IPv4 address, To4 returns the empty Address. 2572 func (a Address) To4() Address { 2573 const ( 2574 ipv4len = 4 2575 ipv6len = 16 2576 ) 2577 if a.Len() == ipv4len { 2578 return a 2579 } 2580 if a.Len() == ipv6len && 2581 isZeros(a.addr[:10]) && 2582 a.addr[10] == 0xff && 2583 a.addr[11] == 0xff { 2584 return AddrFrom4Slice(a.addr[12:16]) 2585 } 2586 return Address{} 2587 } 2588 2589 // isZeros reports whether addr is all zeros. 2590 func isZeros(addr []byte) bool { 2591 for _, b := range addr { 2592 if b != 0 { 2593 return false 2594 } 2595 } 2596 return true 2597 } 2598 2599 // LinkAddress is a byte slice cast as a string that represents a link address. 2600 // It is typically a 6-byte MAC address. 2601 type LinkAddress string 2602 2603 // String implements the fmt.Stringer interface. 2604 func (a LinkAddress) String() string { 2605 switch len(a) { 2606 case 6: 2607 return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5]) 2608 default: 2609 return fmt.Sprintf("%x", []byte(a)) 2610 } 2611 } 2612 2613 // ParseMACAddress parses an IEEE 802 address. 2614 // 2615 // It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff. 2616 func ParseMACAddress(s string) (LinkAddress, error) { 2617 parts := strings.FieldsFunc(s, func(c rune) bool { 2618 return c == ':' || c == '-' 2619 }) 2620 if len(parts) != 6 { 2621 return "", fmt.Errorf("inconsistent parts: %s", s) 2622 } 2623 addr := make([]byte, 0, len(parts)) 2624 for _, part := range parts { 2625 u, err := strconv.ParseUint(part, 16, 8) 2626 if err != nil { 2627 return "", fmt.Errorf("invalid hex digits: %s", s) 2628 } 2629 addr = append(addr, byte(u)) 2630 } 2631 return LinkAddress(addr), nil 2632 } 2633 2634 // AddressWithPrefix is an address with its subnet prefix length. 2635 // 2636 // +stateify savable 2637 type AddressWithPrefix struct { 2638 // Address is a network address. 2639 Address Address 2640 2641 // PrefixLen is the subnet prefix length. 2642 PrefixLen int 2643 } 2644 2645 // String implements the fmt.Stringer interface. 2646 func (a AddressWithPrefix) String() string { 2647 return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen) 2648 } 2649 2650 // Subnet converts the address and prefix into a Subnet value and returns it. 2651 func (a AddressWithPrefix) Subnet() Subnet { 2652 addrLen := a.Address.length 2653 if a.PrefixLen <= 0 { 2654 return Subnet{ 2655 address: AddrFromSlice(bytes.Repeat([]byte{0}, addrLen)), 2656 mask: MaskFromBytes(bytes.Repeat([]byte{0}, addrLen)), 2657 } 2658 } 2659 if a.PrefixLen >= addrLen*8 { 2660 return Subnet{ 2661 address: a.Address, 2662 mask: MaskFromBytes(bytes.Repeat([]byte{0xff}, addrLen)), 2663 } 2664 } 2665 2666 sa := make([]byte, addrLen) 2667 sm := make([]byte, addrLen) 2668 n := uint(a.PrefixLen) 2669 for i := 0; i < addrLen; i++ { 2670 if n >= 8 { 2671 sa[i] = a.Address.addr[i] 2672 sm[i] = 0xff 2673 n -= 8 2674 continue 2675 } 2676 sm[i] = ^byte(0xff >> n) 2677 sa[i] = a.Address.addr[i] & sm[i] 2678 n = 0 2679 } 2680 2681 // For extra caution, call NewSubnet rather than directly creating the Subnet 2682 // value. If that fails it indicates a serious bug in this code, so panic is 2683 // in order. 2684 s, err := NewSubnet(AddrFromSlice(sa), MaskFromBytes(sm)) 2685 if err != nil { 2686 panic("invalid subnet: " + err.Error()) 2687 } 2688 return s 2689 } 2690 2691 // ProtocolAddress is an address and the network protocol it is associated 2692 // with. 2693 type ProtocolAddress struct { 2694 // Protocol is the protocol of the address. 2695 Protocol NetworkProtocolNumber 2696 2697 // AddressWithPrefix is a network address with its subnet prefix length. 2698 AddressWithPrefix AddressWithPrefix 2699 } 2700 2701 var ( 2702 // danglingEndpointsMu protects access to danglingEndpoints. 2703 danglingEndpointsMu sync.Mutex 2704 2705 // danglingEndpoints tracks all dangling endpoints no longer owned by the app. 2706 danglingEndpoints = make(map[Endpoint]struct{}) 2707 ) 2708 2709 // GetDanglingEndpoints returns all dangling endpoints. 2710 func GetDanglingEndpoints() []Endpoint { 2711 danglingEndpointsMu.Lock() 2712 es := make([]Endpoint, 0, len(danglingEndpoints)) 2713 for e := range danglingEndpoints { 2714 es = append(es, e) 2715 } 2716 danglingEndpointsMu.Unlock() 2717 return es 2718 } 2719 2720 // ReleaseDanglingEndpoints clears out all all reference counted objects held by 2721 // dangling endpoints. 2722 func ReleaseDanglingEndpoints() { 2723 // Get the dangling endpoints first to avoid locking around Release(), which 2724 // can cause a lock inversion with endpoint.mu and danglingEndpointsMu. 2725 // Calling Release on a dangling endpoint that has been deleted is a noop. 2726 eps := GetDanglingEndpoints() 2727 for _, ep := range eps { 2728 ep.Abort() 2729 } 2730 } 2731 2732 // AddDanglingEndpoint adds a dangling endpoint. 2733 func AddDanglingEndpoint(e Endpoint) { 2734 danglingEndpointsMu.Lock() 2735 danglingEndpoints[e] = struct{}{} 2736 danglingEndpointsMu.Unlock() 2737 } 2738 2739 // DeleteDanglingEndpoint removes a dangling endpoint. 2740 func DeleteDanglingEndpoint(e Endpoint) { 2741 danglingEndpointsMu.Lock() 2742 delete(danglingEndpoints, e) 2743 danglingEndpointsMu.Unlock() 2744 } 2745 2746 // AsyncLoading is the global barrier for asynchronous endpoint loading 2747 // activities. 2748 var AsyncLoading sync.WaitGroup