github.com/FlowerWrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/tcpip.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tcpip provides the interfaces and related types that users of the 16 // tcpip stack will use in order to create endpoints used to send and receive 17 // data over the network stack. 18 // 19 // The starting point is the creation and configuration of a stack. A stack can 20 // be created by calling the New() function of the tcpip/stack/stack package; 21 // configuring a stack involves creating NICs (via calls to Stack.CreateNIC()), 22 // adding network addresses (via calls to Stack.AddAddress()), and 23 // setting a route table (via a call to Stack.SetRouteTable()). 24 // 25 // Once a stack is configured, endpoints can be created by calling 26 // Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect 27 // to peers, listen for connections, accept connections, etc., depending on the 28 // transport protocol selected. 29 package tcpip 30 31 import ( 32 "errors" 33 "fmt" 34 "math/bits" 35 "reflect" 36 "strconv" 37 "strings" 38 "sync" 39 "sync/atomic" 40 "time" 41 42 "github.com/FlowerWrong/netstack/tcpip/buffer" 43 "github.com/FlowerWrong/netstack/tcpip/iptables" 44 "github.com/FlowerWrong/netstack/waiter" 45 ) 46 47 // Error represents an error in the netstack error space. Using a special type 48 // ensures that errors outside of this space are not accidentally introduced. 49 // 50 // Note: to support save / restore, it is important that all tcpip errors have 51 // distinct error messages. 52 type Error struct { 53 msg string 54 55 ignoreStats bool 56 } 57 58 // String implements fmt.Stringer.String. 59 func (e *Error) String() string { 60 if e == nil { 61 return "<nil>" 62 } 63 return e.msg 64 } 65 66 // IgnoreStats indicates whether this error type should be included in failure 67 // counts in tcpip.Stats structs. 68 func (e *Error) IgnoreStats() bool { 69 return e.ignoreStats 70 } 71 72 // Errors that can be returned by the network stack. 73 var ( 74 ErrUnknownProtocol = &Error{msg: "unknown protocol"} 75 ErrUnknownNICID = &Error{msg: "unknown nic id"} 76 ErrUnknownDevice = &Error{msg: "unknown device"} 77 ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"} 78 ErrDuplicateNICID = &Error{msg: "duplicate nic id"} 79 ErrDuplicateAddress = &Error{msg: "duplicate address"} 80 ErrNoRoute = &Error{msg: "no route"} 81 ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"} 82 ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true} 83 ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"} 84 ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true} 85 ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true} 86 ErrNoPortAvailable = &Error{msg: "no ports are available"} 87 ErrPortInUse = &Error{msg: "port is in use"} 88 ErrBadLocalAddress = &Error{msg: "bad local address"} 89 ErrClosedForSend = &Error{msg: "endpoint is closed for send"} 90 ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"} 91 ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true} 92 ErrConnectionRefused = &Error{msg: "connection was refused"} 93 ErrTimeout = &Error{msg: "operation timed out"} 94 ErrAborted = &Error{msg: "operation aborted"} 95 ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true} 96 ErrDestinationRequired = &Error{msg: "destination address is required"} 97 ErrNotSupported = &Error{msg: "operation not supported"} 98 ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"} 99 ErrNotConnected = &Error{msg: "endpoint not connected"} 100 ErrConnectionReset = &Error{msg: "connection reset by peer"} 101 ErrConnectionAborted = &Error{msg: "connection aborted"} 102 ErrNoSuchFile = &Error{msg: "no such file"} 103 ErrInvalidOptionValue = &Error{msg: "invalid option value specified"} 104 ErrNoLinkAddress = &Error{msg: "no remote link address"} 105 ErrBadAddress = &Error{msg: "bad address"} 106 ErrNetworkUnreachable = &Error{msg: "network is unreachable"} 107 ErrMessageTooLong = &Error{msg: "message too long"} 108 ErrNoBufferSpace = &Error{msg: "no buffer space available"} 109 ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"} 110 ErrNotPermitted = &Error{msg: "operation not permitted"} 111 ErrAddressFamilyNotSupported = &Error{msg: "address family not supported by protocol"} 112 ) 113 114 // Errors related to Subnet 115 var ( 116 errSubnetLengthMismatch = errors.New("subnet length of address and mask differ") 117 errSubnetAddressMasked = errors.New("subnet address has bits set outside the mask") 118 ) 119 120 // ErrSaveRejection indicates a failed save due to unsupported networking state. 121 // This type of errors is only used for save logic. 122 type ErrSaveRejection struct { 123 Err error 124 } 125 126 // Error returns a sensible description of the save rejection error. 127 func (e ErrSaveRejection) Error() string { 128 return "save rejected due to unsupported networking state: " + e.Err.Error() 129 } 130 131 // A Clock provides the current time. 132 // 133 // Times returned by a Clock should always be used for application-visible 134 // time. Only monotonic times should be used for netstack internal timekeeping. 135 type Clock interface { 136 // NowNanoseconds returns the current real time as a number of 137 // nanoseconds since the Unix epoch. 138 NowNanoseconds() int64 139 140 // NowMonotonic returns a monotonic time value. 141 NowMonotonic() int64 142 } 143 144 // Address is a byte slice cast as a string that represents the address of a 145 // network node. Or, in the case of unix endpoints, it may represent a path. 146 type Address string 147 148 // AddressMask is a bitmask for an address. 149 type AddressMask string 150 151 // String implements Stringer. 152 func (m AddressMask) String() string { 153 return Address(m).String() 154 } 155 156 // Prefix returns the number of bits before the first host bit. 157 func (m AddressMask) Prefix() int { 158 p := 0 159 for _, b := range []byte(m) { 160 p += bits.LeadingZeros8(^b) 161 } 162 return p 163 } 164 165 // Subnet is a subnet defined by its address and mask. 166 type Subnet struct { 167 address Address 168 mask AddressMask 169 } 170 171 // NewSubnet creates a new Subnet, checking that the address and mask are the same length. 172 func NewSubnet(a Address, m AddressMask) (Subnet, error) { 173 if len(a) != len(m) { 174 return Subnet{}, errSubnetLengthMismatch 175 } 176 for i := 0; i < len(a); i++ { 177 if a[i]&^m[i] != 0 { 178 return Subnet{}, errSubnetAddressMasked 179 } 180 } 181 return Subnet{a, m}, nil 182 } 183 184 // String implements Stringer. 185 func (s Subnet) String() string { 186 return fmt.Sprintf("%s/%d", s.ID(), s.Prefix()) 187 } 188 189 // Contains returns true iff the address is of the same length and matches the 190 // subnet address and mask. 191 func (s *Subnet) Contains(a Address) bool { 192 if len(a) != len(s.address) { 193 return false 194 } 195 for i := 0; i < len(a); i++ { 196 if a[i]&s.mask[i] != s.address[i] { 197 return false 198 } 199 } 200 return true 201 } 202 203 // ID returns the subnet ID. 204 func (s *Subnet) ID() Address { 205 return s.address 206 } 207 208 // Bits returns the number of ones (network bits) and zeros (host bits) in the 209 // subnet mask. 210 func (s *Subnet) Bits() (ones int, zeros int) { 211 ones = s.mask.Prefix() 212 return ones, len(s.mask)*8 - ones 213 } 214 215 // Prefix returns the number of bits before the first host bit. 216 func (s *Subnet) Prefix() int { 217 return s.mask.Prefix() 218 } 219 220 // Mask returns the subnet mask. 221 func (s *Subnet) Mask() AddressMask { 222 return s.mask 223 } 224 225 // Broadcast returns the subnet's broadcast address. 226 func (s *Subnet) Broadcast() Address { 227 addr := []byte(s.address) 228 for i := range addr { 229 addr[i] |= ^s.mask[i] 230 } 231 return Address(addr) 232 } 233 234 // NICID is a number that uniquely identifies a NIC. 235 type NICID int32 236 237 // ShutdownFlags represents flags that can be passed to the Shutdown() method 238 // of the Endpoint interface. 239 type ShutdownFlags int 240 241 // Values of the flags that can be passed to the Shutdown() method. They can 242 // be OR'ed together. 243 const ( 244 ShutdownRead ShutdownFlags = 1 << iota 245 ShutdownWrite 246 ) 247 248 // FullAddress represents a full transport node address, as required by the 249 // Connect() and Bind() methods. 250 // 251 // +stateify savable 252 type FullAddress struct { 253 // NIC is the ID of the NIC this address refers to. 254 // 255 // This may not be used by all endpoint types. 256 NIC NICID 257 258 // Addr is the network address. 259 Addr Address 260 261 // Port is the transport port. 262 // 263 // This may not be used by all endpoint types. 264 Port uint16 265 } 266 267 // Payloader is an interface that provides data. 268 // 269 // This interface allows the endpoint to request the amount of data it needs 270 // based on internal buffers without exposing them. 271 type Payloader interface { 272 // FullPayload returns all available bytes. 273 FullPayload() ([]byte, *Error) 274 275 // Payload returns a slice containing at most size bytes. 276 Payload(size int) ([]byte, *Error) 277 } 278 279 // SlicePayload implements Payloader for slices. 280 // 281 // This is typically used for tests. 282 type SlicePayload []byte 283 284 // FullPayload implements Payloader.FullPayload. 285 func (s SlicePayload) FullPayload() ([]byte, *Error) { 286 return s, nil 287 } 288 289 // Payload implements Payloader.Payload. 290 func (s SlicePayload) Payload(size int) ([]byte, *Error) { 291 if size > len(s) { 292 size = len(s) 293 } 294 return s[:size], nil 295 } 296 297 // A ControlMessages contains socket control messages for IP sockets. 298 // 299 // +stateify savable 300 type ControlMessages struct { 301 // HasTimestamp indicates whether Timestamp is valid/set. 302 HasTimestamp bool 303 304 // Timestamp is the time (in ns) that the last packed used to create 305 // the read data was received. 306 Timestamp int64 307 308 // HasInq indicates whether Inq is valid/set. 309 HasInq bool 310 311 // Inq is the number of bytes ready to be received. 312 Inq int32 313 } 314 315 // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) 316 // that exposes functionality like read, write, connect, etc. to users of the 317 // networking stack. 318 type Endpoint interface { 319 // Close puts the endpoint in a closed state and frees all resources 320 // associated with it. 321 Close() 322 323 // Read reads data from the endpoint and optionally returns the sender. 324 // 325 // This method does not block if there is no data pending. It will also 326 // either return an error or data, never both. 327 Read(*FullAddress) (buffer.View, ControlMessages, *Error) 328 329 // Write writes data to the endpoint's peer. This method does not block if 330 // the data cannot be written. 331 // 332 // Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes 333 // successfully written to the Endpoint. That is, if a call to 334 // Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and 335 // the caller should not use data[:n] after Write returns. 336 // 337 // Note that unlike io.Writer.Write, it is not an error for Write to 338 // perform a partial write (if n > 0, no error may be returned). Only 339 // stream (TCP) Endpoints may return partial writes, and even then only 340 // in the case where writing additional data would block. Other Endpoints 341 // will either write the entire message or return an error. 342 // 343 // For UDP and Ping sockets if address resolution is required, 344 // ErrNoLinkAddress and a notification channel is returned for the caller to 345 // block. Channel is closed once address resolution is complete (success or 346 // not). The channel is only non-nil in this case. 347 Write(Payloader, WriteOptions) (int64, <-chan struct{}, *Error) 348 349 // Peek reads data without consuming it from the endpoint. 350 // 351 // This method does not block if there is no data pending. 352 Peek([][]byte) (int64, ControlMessages, *Error) 353 354 // Connect connects the endpoint to its peer. Specifying a NIC is 355 // optional. 356 // 357 // There are three classes of return values: 358 // nil -- the attempt to connect succeeded. 359 // ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started 360 // but hasn't completed yet. In this case, the caller must call Connect 361 // or GetSockOpt(ErrorOption) when the endpoint becomes writable to 362 // get the actual result. The first call to Connect after the socket has 363 // connected returns nil. Calling connect again results in ErrAlreadyConnected. 364 // Anything else -- the attempt to connect failed. 365 // 366 // If address.Addr is empty, this means that Enpoint has to be 367 // disconnected if this is supported, otherwise 368 // ErrAddressFamilyNotSupported must be returned. 369 Connect(address FullAddress) *Error 370 371 // Disconnect disconnects the endpoint from its peer. 372 Disconnect() *Error 373 374 // Shutdown closes the read and/or write end of the endpoint connection 375 // to its peer. 376 Shutdown(flags ShutdownFlags) *Error 377 378 // Listen puts the endpoint in "listen" mode, which allows it to accept 379 // new connections. 380 Listen(backlog int) *Error 381 382 // Accept returns a new endpoint if a peer has established a connection 383 // to an endpoint previously set to listen mode. This method does not 384 // block if no new connections are available. 385 // 386 // The returned Queue is the wait queue for the newly created endpoint. 387 Accept() (Endpoint, *waiter.Queue, *Error) 388 389 // Bind binds the endpoint to a specific local address and port. 390 // Specifying a NIC is optional. 391 Bind(address FullAddress) *Error 392 393 // GetLocalAddress returns the address to which the endpoint is bound. 394 GetLocalAddress() (FullAddress, *Error) 395 396 // GetRemoteAddress returns the address to which the endpoint is 397 // connected. 398 GetRemoteAddress() (FullAddress, *Error) 399 400 // Readiness returns the current readiness of the endpoint. For example, 401 // if waiter.EventIn is set, the endpoint is immediately readable. 402 Readiness(mask waiter.EventMask) waiter.EventMask 403 404 // SetSockOpt sets a socket option. opt should be one of the *Option types. 405 SetSockOpt(opt interface{}) *Error 406 407 // SetSockOptInt sets a socket option, for simple cases where a value 408 // has the int type. 409 SetSockOptInt(opt SockOpt, v int) *Error 410 411 // GetSockOpt gets a socket option. opt should be a pointer to one of the 412 // *Option types. 413 GetSockOpt(opt interface{}) *Error 414 415 // GetSockOptInt gets a socket option for simple cases where a return 416 // value has the int type. 417 GetSockOptInt(SockOpt) (int, *Error) 418 419 // State returns a socket's lifecycle state. The returned value is 420 // protocol-specific and is primarily used for diagnostics. 421 State() uint32 422 423 // ModerateRecvBuf should be called everytime data is copied to the user 424 // space. This allows for dynamic tuning of recv buffer space for a 425 // given socket. 426 // 427 // NOTE: This method is a no-op for sockets other than TCP. 428 ModerateRecvBuf(copied int) 429 430 // IPTables returns the iptables for this endpoint's stack. 431 IPTables() (iptables.IPTables, error) 432 } 433 434 // WriteOptions contains options for Endpoint.Write. 435 type WriteOptions struct { 436 // If To is not nil, write to the given address instead of the endpoint's 437 // peer. 438 To *FullAddress 439 440 // More has the same semantics as Linux's MSG_MORE. 441 More bool 442 443 // EndOfRecord has the same semantics as Linux's MSG_EOR. 444 EndOfRecord bool 445 446 // Atomic means that all data fetched from Payloader must be written to the 447 // endpoint. If Atomic is false, then data fetched from the Payloader may be 448 // discarded if available endpoint buffer space is unsufficient. 449 Atomic bool 450 } 451 452 // SockOpt represents socket options which values have the int type. 453 type SockOpt int 454 455 const ( 456 // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the 457 // number of unread bytes in the input buffer should be returned. 458 ReceiveQueueSizeOption SockOpt = iota 459 460 // SendBufferSizeOption is used by SetSockOptInt/GetSockOptInt to 461 // specify the send buffer size option. 462 SendBufferSizeOption 463 464 // ReceiveBufferSizeOption is used by SetSockOptInt/GetSockOptInt to 465 // specify the receive buffer size option. 466 ReceiveBufferSizeOption 467 468 // SendQueueSizeOption is used in GetSockOptInt to specify that the 469 // number of unread bytes in the output buffer should be returned. 470 SendQueueSizeOption 471 472 // TODO(b/137664753): convert all int socket options to be handled via 473 // GetSockOptInt. 474 ) 475 476 // ErrorOption is used in GetSockOpt to specify that the last error reported by 477 // the endpoint should be cleared and returned. 478 type ErrorOption struct{} 479 480 // V6OnlyOption is used by SetSockOpt/GetSockOpt to specify whether an IPv6 481 // socket is to be restricted to sending and receiving IPv6 packets only. 482 type V6OnlyOption int 483 484 // DelayOption is used by SetSockOpt/GetSockOpt to specify if data should be 485 // sent out immediately by the transport protocol. For TCP, it determines if the 486 // Nagle algorithm is on or off. 487 type DelayOption int 488 489 // CorkOption is used by SetSockOpt/GetSockOpt to specify if data should be 490 // held until segments are full by the TCP transport protocol. 491 type CorkOption int 492 493 // ReuseAddressOption is used by SetSockOpt/GetSockOpt to specify whether Bind() 494 // should allow reuse of local address. 495 type ReuseAddressOption int 496 497 // ReusePortOption is used by SetSockOpt/GetSockOpt to permit multiple sockets 498 // to be bound to an identical socket address. 499 type ReusePortOption int 500 501 // BindToDeviceOption is used by SetSockOpt/GetSockOpt to specify that sockets 502 // should bind only on a specific NIC. 503 type BindToDeviceOption string 504 505 // QuickAckOption is stubbed out in SetSockOpt/GetSockOpt. 506 type QuickAckOption int 507 508 // PasscredOption is used by SetSockOpt/GetSockOpt to specify whether 509 // SCM_CREDENTIALS socket control messages are enabled. 510 // 511 // Only supported on Unix sockets. 512 type PasscredOption int 513 514 // TCPInfoOption is used by GetSockOpt to expose TCP statistics. 515 // 516 // TODO(b/64800844): Add and populate stat fields. 517 type TCPInfoOption struct { 518 RTT time.Duration 519 RTTVar time.Duration 520 } 521 522 // KeepaliveEnabledOption is used by SetSockOpt/GetSockOpt to specify whether 523 // TCP keepalive is enabled for this socket. 524 type KeepaliveEnabledOption int 525 526 // KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a 527 // connection must remain idle before the first TCP keepalive packet is sent. 528 // Once this time is reached, KeepaliveIntervalOption is used instead. 529 type KeepaliveIdleOption time.Duration 530 531 // KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the 532 // interval between sending TCP keepalive packets. 533 type KeepaliveIntervalOption time.Duration 534 535 // KeepaliveCountOption is used by SetSockOpt/GetSockOpt to specify the number 536 // of un-ACKed TCP keepalives that will be sent before the connection is 537 // closed. 538 type KeepaliveCountOption int 539 540 // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get 541 // the current congestion control algorithm. 542 type CongestionControlOption string 543 544 // AvailableCongestionControlOption is used to query the supported congestion 545 // control algorithms. 546 type AvailableCongestionControlOption string 547 548 // ModerateReceiveBufferOption allows the caller to enable/disable TCP receive 549 // buffer moderation. 550 type ModerateReceiveBufferOption bool 551 552 // MaxSegOption is used by SetSockOpt/GetSockOpt to set/get the current 553 // Maximum Segment Size(MSS) value as specified using the TCP_MAXSEG option. 554 type MaxSegOption int 555 556 // TTLOption is used by SetSockOpt/GetSockOpt to control the default TTL/hop 557 // limit value for unicast messages. The default is protocol specific. 558 // 559 // A zero value indicates the default. 560 type TTLOption uint8 561 562 // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default 563 // TTL value for multicast messages. The default is 1. 564 type MulticastTTLOption uint8 565 566 // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a 567 // default interface for multicast. 568 type MulticastInterfaceOption struct { 569 NIC NICID 570 InterfaceAddr Address 571 } 572 573 // MulticastLoopOption is used by SetSockOpt/GetSockOpt to specify whether 574 // multicast packets sent over a non-loopback interface will be looped back. 575 type MulticastLoopOption bool 576 577 // MembershipOption is used by SetSockOpt/GetSockOpt as an argument to 578 // AddMembershipOption and RemoveMembershipOption. 579 type MembershipOption struct { 580 NIC NICID 581 InterfaceAddr Address 582 MulticastAddr Address 583 } 584 585 // AddMembershipOption is used by SetSockOpt/GetSockOpt to join a multicast 586 // group identified by the given multicast address, on the interface matching 587 // the given interface address. 588 type AddMembershipOption MembershipOption 589 590 // RemoveMembershipOption is used by SetSockOpt/GetSockOpt to leave a multicast 591 // group identified by the given multicast address, on the interface matching 592 // the given interface address. 593 type RemoveMembershipOption MembershipOption 594 595 // OutOfBandInlineOption is used by SetSockOpt/GetSockOpt to specify whether 596 // TCP out-of-band data is delivered along with the normal in-band data. 597 type OutOfBandInlineOption int 598 599 // BroadcastOption is used by SetSockOpt/GetSockOpt to specify whether 600 // datagram sockets are allowed to send packets to a broadcast address. 601 type BroadcastOption int 602 603 // DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify 604 // a default TTL. 605 type DefaultTTLOption uint8 606 607 // Route is a row in the routing table. It specifies through which NIC (and 608 // gateway) sets of packets should be routed. A row is considered viable if the 609 // masked target address matches the destination address in the row. 610 type Route struct { 611 // Destination must contain the target address for this row to be viable. 612 Destination Subnet 613 614 // Gateway is the gateway to be used if this row is viable. 615 Gateway Address 616 617 // NIC is the id of the nic to be used if this row is viable. 618 NIC NICID 619 } 620 621 // String implements the fmt.Stringer interface. 622 func (r Route) String() string { 623 var out strings.Builder 624 fmt.Fprintf(&out, "%s", r.Destination) 625 if len(r.Gateway) > 0 { 626 fmt.Fprintf(&out, " via %s", r.Gateway) 627 } 628 fmt.Fprintf(&out, " nic %d", r.NIC) 629 return out.String() 630 } 631 632 // TransportProtocolNumber is the number of a transport protocol. 633 type TransportProtocolNumber uint32 634 635 // NetworkProtocolNumber is the number of a network protocol. 636 type NetworkProtocolNumber uint32 637 638 // A StatCounter keeps track of a statistic. 639 type StatCounter struct { 640 count uint64 641 } 642 643 // Increment adds one to the counter. 644 func (s *StatCounter) Increment() { 645 s.IncrementBy(1) 646 } 647 648 // Value returns the current value of the counter. 649 func (s *StatCounter) Value() uint64 { 650 return atomic.LoadUint64(&s.count) 651 } 652 653 // IncrementBy increments the counter by v. 654 func (s *StatCounter) IncrementBy(v uint64) { 655 atomic.AddUint64(&s.count, v) 656 } 657 658 func (s *StatCounter) String() string { 659 return strconv.FormatUint(s.Value(), 10) 660 } 661 662 // ICMPv4PacketStats enumerates counts for all ICMPv4 packet types. 663 type ICMPv4PacketStats struct { 664 // Echo is the total number of ICMPv4 echo packets counted. 665 Echo *StatCounter 666 667 // EchoReply is the total number of ICMPv4 echo reply packets counted. 668 EchoReply *StatCounter 669 670 // DstUnreachable is the total number of ICMPv4 destination unreachable 671 // packets counted. 672 DstUnreachable *StatCounter 673 674 // SrcQuench is the total number of ICMPv4 source quench packets 675 // counted. 676 SrcQuench *StatCounter 677 678 // Redirect is the total number of ICMPv4 redirect packets counted. 679 Redirect *StatCounter 680 681 // TimeExceeded is the total number of ICMPv4 time exceeded packets 682 // counted. 683 TimeExceeded *StatCounter 684 685 // ParamProblem is the total number of ICMPv4 parameter problem packets 686 // counted. 687 ParamProblem *StatCounter 688 689 // Timestamp is the total number of ICMPv4 timestamp packets counted. 690 Timestamp *StatCounter 691 692 // TimestampReply is the total number of ICMPv4 timestamp reply packets 693 // counted. 694 TimestampReply *StatCounter 695 696 // InfoRequest is the total number of ICMPv4 information request 697 // packets counted. 698 InfoRequest *StatCounter 699 700 // InfoReply is the total number of ICMPv4 information reply packets 701 // counted. 702 InfoReply *StatCounter 703 } 704 705 // ICMPv6PacketStats enumerates counts for all ICMPv6 packet types. 706 type ICMPv6PacketStats struct { 707 // EchoRequest is the total number of ICMPv6 echo request packets 708 // counted. 709 EchoRequest *StatCounter 710 711 // EchoReply is the total number of ICMPv6 echo reply packets counted. 712 EchoReply *StatCounter 713 714 // DstUnreachable is the total number of ICMPv6 destination unreachable 715 // packets counted. 716 DstUnreachable *StatCounter 717 718 // PacketTooBig is the total number of ICMPv6 packet too big packets 719 // counted. 720 PacketTooBig *StatCounter 721 722 // TimeExceeded is the total number of ICMPv6 time exceeded packets 723 // counted. 724 TimeExceeded *StatCounter 725 726 // ParamProblem is the total number of ICMPv6 parameter problem packets 727 // counted. 728 ParamProblem *StatCounter 729 730 // RouterSolicit is the total number of ICMPv6 router solicit packets 731 // counted. 732 RouterSolicit *StatCounter 733 734 // RouterAdvert is the total number of ICMPv6 router advert packets 735 // counted. 736 RouterAdvert *StatCounter 737 738 // NeighborSolicit is the total number of ICMPv6 neighbor solicit 739 // packets counted. 740 NeighborSolicit *StatCounter 741 742 // NeighborAdvert is the total number of ICMPv6 neighbor advert packets 743 // counted. 744 NeighborAdvert *StatCounter 745 746 // RedirectMsg is the total number of ICMPv6 redirect message packets 747 // counted. 748 RedirectMsg *StatCounter 749 } 750 751 // ICMPv4SentPacketStats collects outbound ICMPv4-specific stats. 752 type ICMPv4SentPacketStats struct { 753 ICMPv4PacketStats 754 755 // Dropped is the total number of ICMPv4 packets dropped due to link 756 // layer errors. 757 Dropped *StatCounter 758 759 // RateLimited is the total number of ICMPv6 packets dropped due to 760 // rate limit being exceeded. 761 RateLimited *StatCounter 762 } 763 764 // ICMPv4ReceivedPacketStats collects inbound ICMPv4-specific stats. 765 type ICMPv4ReceivedPacketStats struct { 766 ICMPv4PacketStats 767 768 // Invalid is the total number of ICMPv4 packets received that the 769 // transport layer could not parse. 770 Invalid *StatCounter 771 } 772 773 // ICMPv6SentPacketStats collects outbound ICMPv6-specific stats. 774 type ICMPv6SentPacketStats struct { 775 ICMPv6PacketStats 776 777 // Dropped is the total number of ICMPv6 packets dropped due to link 778 // layer errors. 779 Dropped *StatCounter 780 781 // RateLimited is the total number of ICMPv6 packets dropped due to 782 // rate limit being exceeded. 783 RateLimited *StatCounter 784 } 785 786 // ICMPv6ReceivedPacketStats collects inbound ICMPv6-specific stats. 787 type ICMPv6ReceivedPacketStats struct { 788 ICMPv6PacketStats 789 790 // Invalid is the total number of ICMPv6 packets received that the 791 // transport layer could not parse. 792 Invalid *StatCounter 793 } 794 795 // ICMPStats collects ICMP-specific stats (both v4 and v6). 796 type ICMPStats struct { 797 // ICMPv4SentPacketStats contains counts of sent packets by ICMPv4 packet type 798 // and a single count of packets which failed to write to the link 799 // layer. 800 V4PacketsSent ICMPv4SentPacketStats 801 802 // ICMPv4ReceivedPacketStats contains counts of received packets by ICMPv4 803 // packet type and a single count of invalid packets received. 804 V4PacketsReceived ICMPv4ReceivedPacketStats 805 806 // ICMPv6SentPacketStats contains counts of sent packets by ICMPv6 packet type 807 // and a single count of packets which failed to write to the link 808 // layer. 809 V6PacketsSent ICMPv6SentPacketStats 810 811 // ICMPv6ReceivedPacketStats contains counts of received packets by ICMPv6 812 // packet type and a single count of invalid packets received. 813 V6PacketsReceived ICMPv6ReceivedPacketStats 814 } 815 816 // IPStats collects IP-specific stats (both v4 and v6). 817 type IPStats struct { 818 // PacketsReceived is the total number of IP packets received from the 819 // link layer in nic.DeliverNetworkPacket. 820 PacketsReceived *StatCounter 821 822 // InvalidAddressesReceived is the total number of IP packets received 823 // with an unknown or invalid destination address. 824 InvalidAddressesReceived *StatCounter 825 826 // PacketsDelivered is the total number of incoming IP packets that 827 // are successfully delivered to the transport layer via HandlePacket. 828 PacketsDelivered *StatCounter 829 830 // PacketsSent is the total number of IP packets sent via WritePacket. 831 PacketsSent *StatCounter 832 833 // OutgoingPacketErrors is the total number of IP packets which failed 834 // to write to a link-layer endpoint. 835 OutgoingPacketErrors *StatCounter 836 } 837 838 // TCPStats collects TCP-specific stats. 839 type TCPStats struct { 840 // ActiveConnectionOpenings is the number of connections opened 841 // successfully via Connect. 842 ActiveConnectionOpenings *StatCounter 843 844 // PassiveConnectionOpenings is the number of connections opened 845 // successfully via Listen. 846 PassiveConnectionOpenings *StatCounter 847 848 // ListenOverflowSynDrop is the number of times the listen queue overflowed 849 // and a SYN was dropped. 850 ListenOverflowSynDrop *StatCounter 851 852 // ListenOverflowAckDrop is the number of times the final ACK 853 // in the handshake was dropped due to overflow. 854 ListenOverflowAckDrop *StatCounter 855 856 // ListenOverflowCookieSent is the number of times a SYN cookie was sent. 857 ListenOverflowSynCookieSent *StatCounter 858 859 // ListenOverflowSynCookieRcvd is the number of times a valid SYN 860 // cookie was received. 861 ListenOverflowSynCookieRcvd *StatCounter 862 863 // ListenOverflowInvalidSynCookieRcvd is the number of times an invalid SYN cookie 864 // was received. 865 ListenOverflowInvalidSynCookieRcvd *StatCounter 866 867 // FailedConnectionAttempts is the number of calls to Connect or Listen 868 // (active and passive openings, respectively) that end in an error. 869 FailedConnectionAttempts *StatCounter 870 871 // ValidSegmentsReceived is the number of TCP segments received that 872 // the transport layer successfully parsed. 873 ValidSegmentsReceived *StatCounter 874 875 // InvalidSegmentsReceived is the number of TCP segments received that 876 // the transport layer could not parse. 877 InvalidSegmentsReceived *StatCounter 878 879 // SegmentsSent is the number of TCP segments sent. 880 SegmentsSent *StatCounter 881 882 // ResetsSent is the number of TCP resets sent. 883 ResetsSent *StatCounter 884 885 // ResetsReceived is the number of TCP resets received. 886 ResetsReceived *StatCounter 887 888 // Retransmits is the number of TCP segments retransmitted. 889 Retransmits *StatCounter 890 891 // FastRecovery is the number of times Fast Recovery was used to 892 // recover from packet loss. 893 FastRecovery *StatCounter 894 895 // SACKRecovery is the number of times SACK Recovery was used to 896 // recover from packet loss. 897 SACKRecovery *StatCounter 898 899 // SlowStartRetransmits is the number of segments retransmitted in slow 900 // start. 901 SlowStartRetransmits *StatCounter 902 903 // FastRetransmit is the number of segments retransmitted in fast 904 // recovery. 905 FastRetransmit *StatCounter 906 907 // Timeouts is the number of times the RTO expired. 908 Timeouts *StatCounter 909 910 // ChecksumErrors is the number of segments dropped due to bad checksums. 911 ChecksumErrors *StatCounter 912 } 913 914 // UDPStats collects UDP-specific stats. 915 type UDPStats struct { 916 // PacketsReceived is the number of UDP datagrams received via 917 // HandlePacket. 918 PacketsReceived *StatCounter 919 920 // UnknownPortErrors is the number of incoming UDP datagrams dropped 921 // because they did not have a known destination port. 922 UnknownPortErrors *StatCounter 923 924 // ReceiveBufferErrors is the number of incoming UDP datagrams dropped 925 // due to the receiving buffer being in an invalid state. 926 ReceiveBufferErrors *StatCounter 927 928 // MalformedPacketsReceived is the number of incoming UDP datagrams 929 // dropped due to the UDP header being in a malformed state. 930 MalformedPacketsReceived *StatCounter 931 932 // PacketsSent is the number of UDP datagrams sent via sendUDP. 933 PacketsSent *StatCounter 934 } 935 936 // Stats holds statistics about the networking stack. 937 // 938 // All fields are optional. 939 type Stats struct { 940 // UnknownProtocolRcvdPackets is the number of packets received by the 941 // stack that were for an unknown or unsupported protocol. 942 UnknownProtocolRcvdPackets *StatCounter 943 944 // MalformedRcvPackets is the number of packets received by the stack 945 // that were deemed malformed. 946 MalformedRcvdPackets *StatCounter 947 948 // DroppedPackets is the number of packets dropped due to full queues. 949 DroppedPackets *StatCounter 950 951 // ICMP breaks out ICMP-specific stats (both v4 and v6). 952 ICMP ICMPStats 953 954 // IP breaks out IP-specific stats (both v4 and v6). 955 IP IPStats 956 957 // TCP breaks out TCP-specific stats. 958 TCP TCPStats 959 960 // UDP breaks out UDP-specific stats. 961 UDP UDPStats 962 } 963 964 func fillIn(v reflect.Value) { 965 for i := 0; i < v.NumField(); i++ { 966 v := v.Field(i) 967 switch v.Kind() { 968 case reflect.Ptr: 969 if s := v.Addr().Interface().(**StatCounter); *s == nil { 970 *s = &StatCounter{} 971 } 972 case reflect.Struct: 973 fillIn(v) 974 default: 975 panic(fmt.Sprintf("unexpected type %s", v.Type())) 976 } 977 } 978 } 979 980 // FillIn returns a copy of s with nil fields initialized to new StatCounters. 981 func (s Stats) FillIn() Stats { 982 fillIn(reflect.ValueOf(&s).Elem()) 983 return s 984 } 985 986 // String implements the fmt.Stringer interface. 987 func (a Address) String() string { 988 switch len(a) { 989 case 4: 990 return fmt.Sprintf("%d.%d.%d.%d", int(a[0]), int(a[1]), int(a[2]), int(a[3])) 991 case 16: 992 // Find the longest subsequence of hexadecimal zeros. 993 start, end := -1, -1 994 for i := 0; i < len(a); i += 2 { 995 j := i 996 for j < len(a) && a[j] == 0 && a[j+1] == 0 { 997 j += 2 998 } 999 if j > i+2 && j-i > end-start { 1000 start, end = i, j 1001 } 1002 } 1003 1004 var b strings.Builder 1005 for i := 0; i < len(a); i += 2 { 1006 if i == start { 1007 b.WriteString("::") 1008 i = end 1009 if end >= len(a) { 1010 break 1011 } 1012 } else if i > 0 { 1013 b.WriteByte(':') 1014 } 1015 v := uint16(a[i+0])<<8 | uint16(a[i+1]) 1016 if v == 0 { 1017 b.WriteByte('0') 1018 } else { 1019 const digits = "0123456789abcdef" 1020 for i := uint(3); i < 4; i-- { 1021 if v := v >> (i * 4); v != 0 { 1022 b.WriteByte(digits[v&0xf]) 1023 } 1024 } 1025 } 1026 } 1027 return b.String() 1028 default: 1029 return fmt.Sprintf("%x", []byte(a)) 1030 } 1031 } 1032 1033 // To4 converts the IPv4 address to a 4-byte representation. 1034 // If the address is not an IPv4 address, To4 returns "". 1035 func (a Address) To4() Address { 1036 const ( 1037 ipv4len = 4 1038 ipv6len = 16 1039 ) 1040 if len(a) == ipv4len { 1041 return a 1042 } 1043 if len(a) == ipv6len && 1044 isZeros(a[0:10]) && 1045 a[10] == 0xff && 1046 a[11] == 0xff { 1047 return a[12:16] 1048 } 1049 return "" 1050 } 1051 1052 // isZeros reports whether a is all zeros. 1053 func isZeros(a Address) bool { 1054 for i := 0; i < len(a); i++ { 1055 if a[i] != 0 { 1056 return false 1057 } 1058 } 1059 return true 1060 } 1061 1062 // LinkAddress is a byte slice cast as a string that represents a link address. 1063 // It is typically a 6-byte MAC address. 1064 type LinkAddress string 1065 1066 // String implements the fmt.Stringer interface. 1067 func (a LinkAddress) String() string { 1068 switch len(a) { 1069 case 6: 1070 return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5]) 1071 default: 1072 return fmt.Sprintf("%x", []byte(a)) 1073 } 1074 } 1075 1076 // ParseMACAddress parses an IEEE 802 address. 1077 // 1078 // It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff. 1079 func ParseMACAddress(s string) (LinkAddress, error) { 1080 parts := strings.FieldsFunc(s, func(c rune) bool { 1081 return c == ':' || c == '-' 1082 }) 1083 if len(parts) != 6 { 1084 return "", fmt.Errorf("inconsistent parts: %s", s) 1085 } 1086 addr := make([]byte, 0, len(parts)) 1087 for _, part := range parts { 1088 u, err := strconv.ParseUint(part, 16, 8) 1089 if err != nil { 1090 return "", fmt.Errorf("invalid hex digits: %s", s) 1091 } 1092 addr = append(addr, byte(u)) 1093 } 1094 return LinkAddress(addr), nil 1095 } 1096 1097 // AddressWithPrefix is an address with its subnet prefix length. 1098 type AddressWithPrefix struct { 1099 // Address is a network address. 1100 Address Address 1101 1102 // PrefixLen is the subnet prefix length. 1103 PrefixLen int 1104 } 1105 1106 // String implements the fmt.Stringer interface. 1107 func (a AddressWithPrefix) String() string { 1108 return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen) 1109 } 1110 1111 // Subnet converts the address and prefix into a Subnet value and returns it. 1112 func (a AddressWithPrefix) Subnet() Subnet { 1113 addrLen := len(a.Address) 1114 if a.PrefixLen <= 0 { 1115 return Subnet{ 1116 address: Address(strings.Repeat("\x00", addrLen)), 1117 mask: AddressMask(strings.Repeat("\x00", addrLen)), 1118 } 1119 } 1120 if a.PrefixLen >= addrLen*8 { 1121 return Subnet{ 1122 address: a.Address, 1123 mask: AddressMask(strings.Repeat("\xff", addrLen)), 1124 } 1125 } 1126 1127 sa := make([]byte, addrLen) 1128 sm := make([]byte, addrLen) 1129 n := uint(a.PrefixLen) 1130 for i := 0; i < addrLen; i++ { 1131 if n >= 8 { 1132 sa[i] = a.Address[i] 1133 sm[i] = 0xff 1134 n -= 8 1135 continue 1136 } 1137 sm[i] = ^byte(0xff >> n) 1138 sa[i] = a.Address[i] & sm[i] 1139 n = 0 1140 } 1141 1142 // For extra caution, call NewSubnet rather than directly creating the Subnet 1143 // value. If that fails it indicates a serious bug in this code, so panic is 1144 // in order. 1145 s, err := NewSubnet(Address(sa), AddressMask(sm)) 1146 if err != nil { 1147 panic("invalid subnet: " + err.Error()) 1148 } 1149 return s 1150 } 1151 1152 // ProtocolAddress is an address and the network protocol it is associated 1153 // with. 1154 type ProtocolAddress struct { 1155 // Protocol is the protocol of the address. 1156 Protocol NetworkProtocolNumber 1157 1158 // AddressWithPrefix is a network address with its subnet prefix length. 1159 AddressWithPrefix AddressWithPrefix 1160 } 1161 1162 var ( 1163 // danglingEndpointsMu protects access to danglingEndpoints. 1164 danglingEndpointsMu sync.Mutex 1165 1166 // danglingEndpoints tracks all dangling endpoints no longer owned by the app. 1167 danglingEndpoints = make(map[Endpoint]struct{}) 1168 ) 1169 1170 // GetDanglingEndpoints returns all dangling endpoints. 1171 func GetDanglingEndpoints() []Endpoint { 1172 es := make([]Endpoint, 0, len(danglingEndpoints)) 1173 danglingEndpointsMu.Lock() 1174 for e := range danglingEndpoints { 1175 es = append(es, e) 1176 } 1177 danglingEndpointsMu.Unlock() 1178 return es 1179 } 1180 1181 // AddDanglingEndpoint adds a dangling endpoint. 1182 func AddDanglingEndpoint(e Endpoint) { 1183 danglingEndpointsMu.Lock() 1184 danglingEndpoints[e] = struct{}{} 1185 danglingEndpointsMu.Unlock() 1186 } 1187 1188 // DeleteDanglingEndpoint removes a dangling endpoint. 1189 func DeleteDanglingEndpoint(e Endpoint) { 1190 danglingEndpointsMu.Lock() 1191 delete(danglingEndpoints, e) 1192 danglingEndpointsMu.Unlock() 1193 } 1194 1195 // AsyncLoading is the global barrier for asynchronous endpoint loading 1196 // activities. 1197 var AsyncLoading sync.WaitGroup