github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/socket/netstack/netstack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package netstack provides an implementation of the socket.Socket interface 16 // that is backed by a tcpip.Endpoint. 17 // 18 // It does not depend on any particular endpoint implementation, and thus can 19 // be used to expose certain endpoints to the sentry while leaving others out, 20 // for example, TCP endpoints and Unix-domain endpoints. 21 // 22 // Lock ordering: netstack => mm: ioSequenceReadWriter copies user memory inside 23 // tcpip.Endpoint.Write(). Netstack is allowed to (and does) hold locks during 24 // this operation. 25 package netstack 26 27 import ( 28 "bytes" 29 "encoding/binary" 30 "fmt" 31 "io" 32 "io/ioutil" 33 "math" 34 "reflect" 35 "time" 36 37 "golang.org/x/sys/unix" 38 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 39 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux/errno" 40 "github.com/nicocha30/gvisor-ligolo/pkg/context" 41 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 42 "github.com/nicocha30/gvisor-ligolo/pkg/hostarch" 43 "github.com/nicocha30/gvisor-ligolo/pkg/log" 44 "github.com/nicocha30/gvisor-ligolo/pkg/marshal" 45 "github.com/nicocha30/gvisor-ligolo/pkg/marshal/primitive" 46 "github.com/nicocha30/gvisor-ligolo/pkg/metric" 47 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch" 48 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/sockfs" 49 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/inet" 50 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel" 51 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 52 ktime "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/time" 53 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/socket" 54 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/socket/netfilter" 55 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 56 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 57 "github.com/nicocha30/gvisor-ligolo/pkg/syserr" 58 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip" 59 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header" 60 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/stack" 61 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/transport" 62 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/transport/tcp" 63 "github.com/nicocha30/gvisor-ligolo/pkg/usermem" 64 "github.com/nicocha30/gvisor-ligolo/pkg/waiter" 65 ) 66 67 const bitsPerUint32 = 32 68 69 // statCounterValue returns a function usable as callback function when defining a gVisor Sentry 70 // metric that contains the value counted by the StatCounter. 71 // This avoids a dependency loop in the tcpip package. 72 func statCounterValue(cm *tcpip.StatCounter) func(...*metric.FieldValue) uint64 { 73 return func(...*metric.FieldValue) uint64 { 74 return cm.Value() 75 } 76 } 77 78 func mustCreateMetric(name, description string) *tcpip.StatCounter { 79 var cm tcpip.StatCounter 80 metric.MustRegisterCustomUint64Metric(name, true /* cumulative */, false /* sync */, description, statCounterValue(&cm)) 81 return &cm 82 } 83 84 func mustCreateGauge(name, description string) *tcpip.StatCounter { 85 var cm tcpip.StatCounter 86 metric.MustRegisterCustomUint64Metric(name, false /* cumulative */, false /* sync */, description, statCounterValue(&cm)) 87 return &cm 88 } 89 90 // Metrics contains metrics exported by netstack. 91 var Metrics = tcpip.Stats{ 92 DroppedPackets: mustCreateMetric("/netstack/dropped_packets", "Number of packets dropped at the transport layer."), 93 NICs: tcpip.NICStats{ 94 MalformedL4RcvdPackets: mustCreateMetric("/netstack/nic/malformed_l4_received_packets", "Number of packets received that failed L4 header parsing."), 95 Tx: tcpip.NICPacketStats{ 96 Packets: mustCreateMetric("/netstack/nic/tx/packets", "Number of packets transmitted."), 97 Bytes: mustCreateMetric("/netstack/nic/tx/bytes", "Number of bytes transmitted."), 98 }, 99 TxPacketsDroppedNoBufferSpace: mustCreateMetric("/netstack/nic/tx_packets_dropped_no_buffer_space", "Number of TX packets dropped as a result of no buffer space errors."), 100 Rx: tcpip.NICPacketStats{ 101 Packets: mustCreateMetric("/netstack/nic/rx/packets", "Number of packets received."), 102 Bytes: mustCreateMetric("/netstack/nic/rx/bytes", "Number of bytes received."), 103 }, 104 DisabledRx: tcpip.NICPacketStats{ 105 Packets: mustCreateMetric("/netstack/nic/disabled_rx/packets", "Number of packets received on disabled NICs."), 106 Bytes: mustCreateMetric("/netstack/nic/disabled_rx/bytes", "Number of bytes received on disabled NICs."), 107 }, 108 Neighbor: tcpip.NICNeighborStats{ 109 UnreachableEntryLookups: mustCreateMetric("/netstack/nic/neighbor/unreachable_entry_loopups", "Number of lookups performed on a neighbor entry in Unreachable state."), 110 DroppedConfirmationForNoninitiatedNeighbor: mustCreateMetric("/netstack/nic/neighbor/dropped_confirmation_for_noninitiated_neighbor", "Number of advertisements received that don't match an entry in the neighbor cache."), 111 DroppedInvalidLinkAddressConfirmations: mustCreateMetric("/netstack/nic/neighbor/dropped_invalid_link_address_confirmations", "Number of advertisements dropped because they have empty source link-layer addresses"), 112 }, 113 }, 114 ICMP: tcpip.ICMPStats{ 115 V4: tcpip.ICMPv4Stats{ 116 PacketsSent: tcpip.ICMPv4SentPacketStats{ 117 ICMPv4PacketStats: tcpip.ICMPv4PacketStats{ 118 EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_request", "Number of ICMPv4 echo request packets sent."), 119 EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_reply", "Number of ICMPv4 echo reply packets sent."), 120 DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_sent/dst_unreachable", "Number of ICMPv4 destination unreachable packets sent."), 121 SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_sent/src_quench", "Number of ICMPv4 source quench packets sent."), 122 Redirect: mustCreateMetric("/netstack/icmp/v4/packets_sent/redirect", "Number of ICMPv4 redirect packets sent."), 123 TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_sent/time_exceeded", "Number of ICMPv4 time exceeded packets sent."), 124 ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_sent/param_problem", "Number of ICMPv4 parameter problem packets sent."), 125 Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp", "Number of ICMPv4 timestamp packets sent."), 126 TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp_reply", "Number of ICMPv4 timestamp reply packets sent."), 127 InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_request", "Number of ICMPv4 information request packets sent."), 128 InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_reply", "Number of ICMPv4 information reply packets sent."), 129 }, 130 Dropped: mustCreateMetric("/netstack/icmp/v4/packets_sent/dropped", "Number of ICMPv4 packets dropped due to link layer errors."), 131 RateLimited: mustCreateMetric("/netstack/icmp/v4/packets_sent/rate_limited", "Number of ICMPv4 packets dropped due to rate limit being exceeded."), 132 }, 133 PacketsReceived: tcpip.ICMPv4ReceivedPacketStats{ 134 ICMPv4PacketStats: tcpip.ICMPv4PacketStats{ 135 EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_request", "Number of ICMPv4 echo request packets received."), 136 EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_reply", "Number of ICMPv4 echo reply packets received."), 137 DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_received/dst_unreachable", "Number of ICMPv4 destination unreachable packets received."), 138 SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_received/src_quench", "Number of ICMPv4 source quench packets received."), 139 Redirect: mustCreateMetric("/netstack/icmp/v4/packets_received/redirect", "Number of ICMPv4 redirect packets received."), 140 TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_received/time_exceeded", "Number of ICMPv4 time exceeded packets received."), 141 ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_received/param_problem", "Number of ICMPv4 parameter problem packets received."), 142 Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp", "Number of ICMPv4 timestamp packets received."), 143 TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp_reply", "Number of ICMPv4 timestamp reply packets received."), 144 InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/info_request", "Number of ICMPv4 information request packets received."), 145 InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/info_reply", "Number of ICMPv4 information reply packets received."), 146 }, 147 Invalid: mustCreateMetric("/netstack/icmp/v4/packets_received/invalid", "Number of ICMPv4 packets received that the transport layer could not parse."), 148 }, 149 }, 150 V6: tcpip.ICMPv6Stats{ 151 PacketsSent: tcpip.ICMPv6SentPacketStats{ 152 ICMPv6PacketStats: tcpip.ICMPv6PacketStats{ 153 EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_request", "Number of ICMPv6 echo request packets sent."), 154 EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_reply", "Number of ICMPv6 echo reply packets sent."), 155 DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_sent/dst_unreachable", "Number of ICMPv6 destination unreachable packets sent."), 156 PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_sent/packet_too_big", "Number of ICMPv6 packet too big packets sent."), 157 TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_sent/time_exceeded", "Number of ICMPv6 time exceeded packets sent."), 158 ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_sent/param_problem", "Number of ICMPv6 parameter problem packets sent."), 159 RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_solicit", "Number of ICMPv6 router solicit packets sent."), 160 RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_advert", "Number of ICMPv6 router advert packets sent."), 161 NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets sent."), 162 NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_advert", "Number of ICMPv6 neighbor advert packets sent."), 163 RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_sent/redirect_msg", "Number of ICMPv6 redirect message packets sent."), 164 MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_query", "Number of ICMPv6 multicast listener query packets sent."), 165 MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent."), 166 MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent."), 167 }, 168 Dropped: mustCreateMetric("/netstack/icmp/v6/packets_sent/dropped", "Number of ICMPv6 packets dropped due to link layer errors."), 169 RateLimited: mustCreateMetric("/netstack/icmp/v6/packets_sent/rate_limited", "Number of ICMPv6 packets dropped due to rate limit being exceeded."), 170 }, 171 PacketsReceived: tcpip.ICMPv6ReceivedPacketStats{ 172 ICMPv6PacketStats: tcpip.ICMPv6PacketStats{ 173 EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_request", "Number of ICMPv6 echo request packets received."), 174 EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_reply", "Number of ICMPv6 echo reply packets received."), 175 DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_received/dst_unreachable", "Number of ICMPv6 destination unreachable packets received."), 176 PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_received/packet_too_big", "Number of ICMPv6 packet too big packets received."), 177 TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_received/time_exceeded", "Number of ICMPv6 time exceeded packets received."), 178 ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_received/param_problem", "Number of ICMPv6 parameter problem packets received."), 179 RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/router_solicit", "Number of ICMPv6 router solicit packets received."), 180 RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/router_advert", "Number of ICMPv6 router advert packets received."), 181 NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets received."), 182 NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_advert", "Number of ICMPv6 neighbor advert packets received."), 183 RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_received/redirect_msg", "Number of ICMPv6 redirect message packets received."), 184 MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_query", "Number of ICMPv6 multicast listener query packets received."), 185 MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent."), 186 MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent."), 187 }, 188 Unrecognized: mustCreateMetric("/netstack/icmp/v6/packets_received/unrecognized", "Number of ICMPv6 packets received that the transport layer does not know how to parse."), 189 Invalid: mustCreateMetric("/netstack/icmp/v6/packets_received/invalid", "Number of ICMPv6 packets received that the transport layer could not parse."), 190 RouterOnlyPacketsDroppedByHost: mustCreateMetric("/netstack/icmp/v6/packets_received/router_only_packets_dropped_by_host", "Number of ICMPv6 packets dropped due to being router-specific packets."), 191 }, 192 }, 193 }, 194 IGMP: tcpip.IGMPStats{ 195 PacketsSent: tcpip.IGMPSentPacketStats{ 196 IGMPPacketStats: tcpip.IGMPPacketStats{ 197 MembershipQuery: mustCreateMetric("/netstack/igmp/packets_sent/membership_query", "Number of IGMP Membership Query messages sent."), 198 V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v1_membership_report", "Number of IGMPv1 Membership Report messages sent."), 199 V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v2_membership_report", "Number of IGMPv2 Membership Report messages sent."), 200 LeaveGroup: mustCreateMetric("/netstack/igmp/packets_sent/leave_group", "Number of IGMP Leave Group messages sent."), 201 }, 202 Dropped: mustCreateMetric("/netstack/igmp/packets_sent/dropped", "Number of IGMP packets dropped due to link layer errors."), 203 }, 204 PacketsReceived: tcpip.IGMPReceivedPacketStats{ 205 IGMPPacketStats: tcpip.IGMPPacketStats{ 206 MembershipQuery: mustCreateMetric("/netstack/igmp/packets_received/membership_query", "Number of IGMP Membership Query messages received."), 207 V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v1_membership_report", "Number of IGMPv1 Membership Report messages received."), 208 V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v2_membership_report", "Number of IGMPv2 Membership Report messages received."), 209 LeaveGroup: mustCreateMetric("/netstack/igmp/packets_received/leave_group", "Number of IGMP Leave Group messages received."), 210 }, 211 Invalid: mustCreateMetric("/netstack/igmp/packets_received/invalid", "Number of IGMP packets received that could not be parsed."), 212 ChecksumErrors: mustCreateMetric("/netstack/igmp/packets_received/checksum_errors", "Number of received IGMP packets with bad checksums."), 213 Unrecognized: mustCreateMetric("/netstack/igmp/packets_received/unrecognized", "Number of unrecognized IGMP packets received."), 214 }, 215 }, 216 IP: tcpip.IPStats{ 217 PacketsReceived: mustCreateMetric("/netstack/ip/packets_received", "Number of IP packets received from the link layer in nic.DeliverNetworkPacket."), 218 DisabledPacketsReceived: mustCreateMetric("/netstack/ip/disabled_packets_received", "Number of IP packets received from the link layer when the IP layer is disabled."), 219 InvalidDestinationAddressesReceived: mustCreateMetric("/netstack/ip/invalid_addresses_received", "Number of IP packets received with an unknown or invalid destination address."), 220 InvalidSourceAddressesReceived: mustCreateMetric("/netstack/ip/invalid_source_addresses_received", "Number of IP packets received with an unknown or invalid source address."), 221 PacketsDelivered: mustCreateMetric("/netstack/ip/packets_delivered", "Number of incoming IP packets that are successfully delivered to the transport layer via HandlePacket."), 222 PacketsSent: mustCreateMetric("/netstack/ip/packets_sent", "Number of IP packets sent via WritePacket."), 223 OutgoingPacketErrors: mustCreateMetric("/netstack/ip/outgoing_packet_errors", "Number of IP packets which failed to write to a link-layer endpoint."), 224 MalformedPacketsReceived: mustCreateMetric("/netstack/ip/malformed_packets_received", "Number of IP packets which failed IP header validation checks."), 225 MalformedFragmentsReceived: mustCreateMetric("/netstack/ip/malformed_fragments_received", "Number of IP fragments which failed IP fragment validation checks."), 226 IPTablesPreroutingDropped: mustCreateMetric("/netstack/ip/iptables/prerouting_dropped", "Number of IP packets dropped in the Prerouting chain."), 227 IPTablesInputDropped: mustCreateMetric("/netstack/ip/iptables/input_dropped", "Number of IP packets dropped in the Input chain."), 228 IPTablesOutputDropped: mustCreateMetric("/netstack/ip/iptables/output_dropped", "Number of IP packets dropped in the Output chain."), 229 OptionTimestampReceived: mustCreateMetric("/netstack/ip/options/timestamp_received", "Number of timestamp options found in received IP packets."), 230 OptionRecordRouteReceived: mustCreateMetric("/netstack/ip/options/record_route_received", "Number of record route options found in received IP packets."), 231 OptionRouterAlertReceived: mustCreateMetric("/netstack/ip/options/router_alert_received", "Number of router alert options found in received IP packets."), 232 OptionUnknownReceived: mustCreateMetric("/netstack/ip/options/unknown_received", "Number of unknown options found in received IP packets."), 233 Forwarding: tcpip.IPForwardingStats{ 234 Unrouteable: mustCreateMetric("/netstack/ip/forwarding/unrouteable", "Number of IP packets received which couldn't be routed and thus were not forwarded."), 235 ExhaustedTTL: mustCreateMetric("/netstack/ip/forwarding/exhausted_ttl", "Number of IP packets received which could not be forwarded due to an exhausted TTL."), 236 LinkLocalSource: mustCreateMetric("/netstack/ip/forwarding/link_local_source_address", "Number of IP packets received which could not be forwarded due to a link-local source address."), 237 LinkLocalDestination: mustCreateMetric("/netstack/ip/forwarding/link_local_destination_address", "Number of IP packets received which could not be forwarded due to a link-local destination address."), 238 ExtensionHeaderProblem: mustCreateMetric("/netstack/ip/forwarding/extension_header_problem", "Number of IP packets received which could not be forwarded due to a problem processing their IPv6 extension headers."), 239 PacketTooBig: mustCreateMetric("/netstack/ip/forwarding/packet_too_big", "Number of IP packets received which could not be forwarded because they could not fit within the outgoing MTU."), 240 HostUnreachable: mustCreateMetric("/netstack/ip/forwarding/host_unreachable", "Number of IP packets received which could not be forwarded due to unresolvable next hop."), 241 Errors: mustCreateMetric("/netstack/ip/forwarding/errors", "Number of IP packets which couldn't be forwarded."), 242 }, 243 }, 244 ARP: tcpip.ARPStats{ 245 PacketsReceived: mustCreateMetric("/netstack/arp/packets_received", "Number of ARP packets received from the link layer."), 246 DisabledPacketsReceived: mustCreateMetric("/netstack/arp/disabled_packets_received", "Number of ARP packets received from the link layer when the ARP layer is disabled."), 247 MalformedPacketsReceived: mustCreateMetric("/netstack/arp/malformed_packets_received", "Number of ARP packets which failed ARP header validation checks."), 248 RequestsReceived: mustCreateMetric("/netstack/arp/requests_received", "Number of ARP requests received."), 249 RequestsReceivedUnknownTargetAddress: mustCreateMetric("/netstack/arp/requests_received_unknown_addr", "Number of ARP requests received with an unknown target address."), 250 OutgoingRequestInterfaceHasNoLocalAddressErrors: mustCreateMetric("/netstack/arp/outgoing_requests_iface_has_no_addr", "Number of failed attempts to send an ARP request with an interface that has no network address."), 251 OutgoingRequestBadLocalAddressErrors: mustCreateMetric("/netstack/arp/outgoing_requests_invalid_local_addr", "Number of failed attempts to send an ARP request with a provided local address that is invalid."), 252 OutgoingRequestsDropped: mustCreateMetric("/netstack/arp/outgoing_requests_dropped", "Number of ARP requests which failed to write to a link-layer endpoint."), 253 OutgoingRequestsSent: mustCreateMetric("/netstack/arp/outgoing_requests_sent", "Number of ARP requests sent."), 254 RepliesReceived: mustCreateMetric("/netstack/arp/replies_received", "Number of ARP replies received."), 255 OutgoingRepliesDropped: mustCreateMetric("/netstack/arp/outgoing_replies_dropped", "Number of ARP replies which failed to write to a link-layer endpoint."), 256 OutgoingRepliesSent: mustCreateMetric("/netstack/arp/outgoing_replies_sent", "Number of ARP replies sent."), 257 }, 258 TCP: tcpip.TCPStats{ 259 ActiveConnectionOpenings: mustCreateMetric("/netstack/tcp/active_connection_openings", "Number of connections opened successfully via Connect."), 260 PassiveConnectionOpenings: mustCreateMetric("/netstack/tcp/passive_connection_openings", "Number of connections opened successfully via Listen."), 261 CurrentEstablished: mustCreateGauge("/netstack/tcp/current_established", "Number of connections in ESTABLISHED state now."), 262 CurrentConnected: mustCreateGauge("/netstack/tcp/current_open", "Number of connections that are in connected state."), 263 EstablishedResets: mustCreateMetric("/netstack/tcp/established_resets", "Number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state"), 264 EstablishedClosed: mustCreateMetric("/netstack/tcp/established_closed", "Number of times established TCP connections made a transition to CLOSED state."), 265 EstablishedTimedout: mustCreateMetric("/netstack/tcp/established_timedout", "Number of times an established connection was reset because of keep-alive time out."), 266 ListenOverflowSynDrop: mustCreateMetric("/netstack/tcp/listen_overflow_syn_drop", "Number of times the listen queue overflowed and a SYN was dropped."), 267 ListenOverflowAckDrop: mustCreateMetric("/netstack/tcp/listen_overflow_ack_drop", "Number of times the listen queue overflowed and the final ACK in the handshake was dropped."), 268 ListenOverflowSynCookieSent: mustCreateMetric("/netstack/tcp/listen_overflow_syn_cookie_sent", "Number of times a SYN cookie was sent."), 269 ListenOverflowSynCookieRcvd: mustCreateMetric("/netstack/tcp/listen_overflow_syn_cookie_rcvd", "Number of times a SYN cookie was received."), 270 ListenOverflowInvalidSynCookieRcvd: mustCreateMetric("/netstack/tcp/listen_overflow_invalid_syn_cookie_rcvd", "Number of times an invalid SYN cookie was received."), 271 FailedConnectionAttempts: mustCreateMetric("/netstack/tcp/failed_connection_attempts", "Number of calls to Connect or Listen (active and passive openings, respectively) that end in an error."), 272 ValidSegmentsReceived: mustCreateMetric("/netstack/tcp/valid_segments_received", "Number of TCP segments received that the transport layer successfully parsed."), 273 InvalidSegmentsReceived: mustCreateMetric("/netstack/tcp/invalid_segments_received", "Number of TCP segments received that the transport layer could not parse."), 274 SegmentsSent: mustCreateMetric("/netstack/tcp/segments_sent", "Number of TCP segments sent."), 275 SegmentSendErrors: mustCreateMetric("/netstack/tcp/segment_send_errors", "Number of TCP segments failed to be sent."), 276 ResetsSent: mustCreateMetric("/netstack/tcp/resets_sent", "Number of TCP resets sent."), 277 ResetsReceived: mustCreateMetric("/netstack/tcp/resets_received", "Number of TCP resets received."), 278 Retransmits: mustCreateMetric("/netstack/tcp/retransmits", "Number of TCP segments retransmitted."), 279 FastRecovery: mustCreateMetric("/netstack/tcp/fast_recovery", "Number of times fast recovery was used to recover from packet loss."), 280 SACKRecovery: mustCreateMetric("/netstack/tcp/sack_recovery", "Number of times SACK recovery was used to recover from packet loss."), 281 TLPRecovery: mustCreateMetric("/netstack/tcp/tlp_recovery", "Number of times tail loss probe triggers recovery from tail loss."), 282 SlowStartRetransmits: mustCreateMetric("/netstack/tcp/slow_start_retransmits", "Number of segments retransmitted in slow start mode."), 283 FastRetransmit: mustCreateMetric("/netstack/tcp/fast_retransmit", "Number of TCP segments which were fast retransmitted."), 284 Timeouts: mustCreateMetric("/netstack/tcp/timeouts", "Number of times RTO expired."), 285 ChecksumErrors: mustCreateMetric("/netstack/tcp/checksum_errors", "Number of segments dropped due to bad checksums."), 286 FailedPortReservations: mustCreateMetric("/netstack/tcp/failed_port_reservations", "Number of time TCP failed to reserve a port."), 287 SegmentsAckedWithDSACK: mustCreateMetric("/netstack/tcp/segments_acked_with_dsack", "Number of segments for which DSACK was received."), 288 SpuriousRecovery: mustCreateMetric("/netstack/tcp/spurious_recovery", "Number of times the connection entered loss recovery spuriously."), 289 SpuriousRTORecovery: mustCreateMetric("/netstack/tcp/spurious_rto_recovery", "Number of times the connection entered RTO spuriously."), 290 ForwardMaxInFlightDrop: mustCreateMetric("/netstack/tcp/forward_max_in_flight_drop", "Number of connection requests dropped due to exceeding in-flight limit."), 291 }, 292 UDP: tcpip.UDPStats{ 293 PacketsReceived: mustCreateMetric("/netstack/udp/packets_received", "Number of UDP datagrams received via HandlePacket."), 294 UnknownPortErrors: mustCreateMetric("/netstack/udp/unknown_port_errors", "Number of incoming UDP datagrams dropped because they did not have a known destination port."), 295 ReceiveBufferErrors: mustCreateMetric("/netstack/udp/receive_buffer_errors", "Number of incoming UDP datagrams dropped due to the receiving buffer being in an invalid state."), 296 MalformedPacketsReceived: mustCreateMetric("/netstack/udp/malformed_packets_received", "Number of incoming UDP datagrams dropped due to the UDP header being in a malformed state."), 297 PacketsSent: mustCreateMetric("/netstack/udp/packets_sent", "Number of UDP datagrams sent."), 298 PacketSendErrors: mustCreateMetric("/netstack/udp/packet_send_errors", "Number of UDP datagrams failed to be sent."), 299 ChecksumErrors: mustCreateMetric("/netstack/udp/checksum_errors", "Number of UDP datagrams dropped due to bad checksums."), 300 }, 301 } 302 303 // DefaultTTL is linux's default TTL. All network protocols in all stacks used 304 // with this package must have this value set as their default TTL. 305 const DefaultTTL = 64 306 307 const sizeOfInt32 int = 4 308 309 var errStackType = syserr.New("expected but did not receive a netstack.Stack", errno.EINVAL) 310 311 // commonEndpoint represents the intersection of a tcpip.Endpoint and a 312 // transport.Endpoint. 313 type commonEndpoint interface { 314 // Readiness implements tcpip.Endpoint.Readiness and 315 // transport.Endpoint.Readiness. 316 Readiness(mask waiter.EventMask) waiter.EventMask 317 318 // SetSockOpt implements tcpip.Endpoint.SetSockOpt and 319 // transport.Endpoint.SetSockOpt. 320 SetSockOpt(tcpip.SettableSocketOption) tcpip.Error 321 322 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt and 323 // transport.Endpoint.SetSockOptInt. 324 SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error 325 326 // GetSockOpt implements tcpip.Endpoint.GetSockOpt and 327 // transport.Endpoint.GetSockOpt. 328 GetSockOpt(tcpip.GettableSocketOption) tcpip.Error 329 330 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt and 331 // transport.Endpoint.GetSockOpt. 332 GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) 333 334 // State returns a socket's lifecycle state. The returned value is 335 // protocol-specific and is primarily used for diagnostics. 336 State() uint32 337 338 // LastError implements tcpip.Endpoint.LastError and 339 // transport.Endpoint.LastError. 340 LastError() tcpip.Error 341 342 // SocketOptions implements tcpip.Endpoint.SocketOptions and 343 // transport.Endpoint.SocketOptions. 344 SocketOptions() *tcpip.SocketOptions 345 } 346 347 // sock encapsulates all the state needed to represent a network stack 348 // endpoint in the kernel context. 349 // 350 // +stateify savable 351 type sock struct { 352 vfsfd vfs.FileDescription 353 vfs.FileDescriptionDefaultImpl 354 vfs.DentryMetadataFileDescriptionImpl 355 vfs.LockFD 356 socket.SendReceiveTimeout 357 *waiter.Queue 358 359 family int 360 Endpoint tcpip.Endpoint 361 skType linux.SockType 362 protocol int 363 364 namespace *inet.Namespace 365 366 // readMu protects access to the below fields. 367 readMu sync.Mutex `state:"nosave"` 368 369 // sockOptTimestamp corresponds to SO_TIMESTAMP. When true, timestamps 370 // of returned messages can be returned via control messages. When 371 // false, the same timestamp is instead stored and can be read via the 372 // SIOCGSTAMP ioctl. It is protected by readMu. See socket(7). 373 sockOptTimestamp bool 374 // timestampValid indicates whether timestamp for SIOCGSTAMP has been 375 // set. It is protected by readMu. 376 timestampValid bool 377 // timestamp holds the timestamp to use with SIOCTSTAMP. It is only 378 // valid when timestampValid is true. It is protected by readMu. 379 timestamp time.Time `state:".(int64)"` 380 381 // TODO(b/153685824): Move this to SocketOptions. 382 // sockOptInq corresponds to TCP_INQ. 383 sockOptInq bool 384 } 385 386 var _ = socket.Socket(&sock{}) 387 388 // New creates a new endpoint socket. 389 func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue *waiter.Queue, endpoint tcpip.Endpoint) (*vfs.FileDescription, *syserr.Error) { 390 if skType == linux.SOCK_STREAM { 391 endpoint.SocketOptions().SetDelayOption(true) 392 } 393 394 mnt := t.Kernel().SocketMount() 395 d := sockfs.NewDentry(t, mnt) 396 defer d.DecRef(t) 397 398 namespace := t.NetworkNamespace() 399 s := &sock{ 400 Queue: queue, 401 family: family, 402 Endpoint: endpoint, 403 skType: skType, 404 protocol: protocol, 405 namespace: namespace, 406 } 407 s.LockFD.Init(&vfs.FileLocks{}) 408 vfsfd := &s.vfsfd 409 if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{ 410 DenyPRead: true, 411 DenyPWrite: true, 412 UseDentryMetadata: true, 413 }); err != nil { 414 return nil, syserr.FromError(err) 415 } 416 namespace.IncRef() 417 return vfsfd, nil 418 } 419 420 // Release implements vfs.FileDescriptionImpl.Release. 421 func (s *sock) Release(ctx context.Context) { 422 kernel.KernelFromContext(ctx).DeleteSocket(&s.vfsfd) 423 e, ch := waiter.NewChannelEntry(waiter.EventHUp | waiter.EventErr) 424 s.EventRegister(&e) 425 defer s.EventUnregister(&e) 426 427 s.Endpoint.Close() 428 429 // SO_LINGER option is valid only for TCP. For other socket types 430 // return after endpoint close. 431 if family, skType, _ := s.Type(); skType == linux.SOCK_STREAM && (family == linux.AF_INET || family == linux.AF_INET6) { 432 v := s.Endpoint.SocketOptions().GetLinger() 433 // The case for zero timeout is handled in tcp endpoint close function. 434 // Close is blocked until either: 435 // 1. The endpoint state is not in any of the states: FIN-WAIT1, 436 // CLOSING and LAST_ACK. 437 // 2. Timeout is reached. 438 if v.Enabled && v.Timeout != 0 { 439 t := kernel.TaskFromContext(ctx) 440 start := t.Kernel().MonotonicClock().Now() 441 deadline := start.Add(v.Timeout) 442 _ = t.BlockWithDeadline(ch, true, deadline) 443 } 444 } 445 s.namespace.DecRef(ctx) 446 } 447 448 // Epollable implements FileDescriptionImpl.Epollable. 449 func (s *sock) Epollable() bool { 450 return true 451 } 452 453 // Read implements vfs.FileDescriptionImpl. 454 func (s *sock) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 455 // All flags other than RWF_NOWAIT should be ignored. 456 // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. 457 if opts.Flags != 0 { 458 return 0, linuxerr.EOPNOTSUPP 459 } 460 461 if dst.NumBytes() == 0 { 462 return 0, nil 463 } 464 n, _, _, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false) 465 if err == syserr.ErrWouldBlock { 466 return int64(n), linuxerr.ErrWouldBlock 467 } 468 if err != nil { 469 return 0, err.ToError() 470 } 471 return int64(n), nil 472 } 473 474 // Write implements vfs.FileDescriptionImpl. 475 func (s *sock) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 476 // All flags other than RWF_NOWAIT should be ignored. 477 // TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT. 478 if opts.Flags != 0 { 479 return 0, linuxerr.EOPNOTSUPP 480 } 481 482 r := src.Reader(ctx) 483 n, err := s.Endpoint.Write(r, tcpip.WriteOptions{}) 484 if _, ok := err.(*tcpip.ErrWouldBlock); ok { 485 return 0, linuxerr.ErrWouldBlock 486 } 487 if err != nil { 488 return 0, syserr.TranslateNetstackError(err).ToError() 489 } 490 491 if n < src.NumBytes() { 492 return n, linuxerr.ErrWouldBlock 493 } 494 495 return n, nil 496 } 497 498 // Accept implements the linux syscall accept(2) for sockets backed by 499 // tcpip.Endpoint. 500 func (s *sock) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) { 501 // Issue the accept request to get the new endpoint. 502 var peerAddr *tcpip.FullAddress 503 if peerRequested { 504 peerAddr = &tcpip.FullAddress{} 505 } 506 ep, wq, terr := s.Endpoint.Accept(peerAddr) 507 if terr != nil { 508 if _, ok := terr.(*tcpip.ErrWouldBlock); !ok || !blocking { 509 return 0, nil, 0, syserr.TranslateNetstackError(terr) 510 } 511 512 var err *syserr.Error 513 ep, wq, err = s.blockingAccept(t, peerAddr) 514 if err != nil { 515 return 0, nil, 0, err 516 } 517 } 518 519 ns, err := New(t, s.family, s.skType, s.protocol, wq, ep) 520 if err != nil { 521 return 0, nil, 0, err 522 } 523 defer ns.DecRef(t) 524 525 if err := ns.SetStatusFlags(t, t.Credentials(), uint32(flags&linux.SOCK_NONBLOCK)); err != nil { 526 return 0, nil, 0, syserr.FromError(err) 527 } 528 529 var addr linux.SockAddr 530 var addrLen uint32 531 if peerAddr != nil { 532 // Get address of the peer and write it to peer slice. 533 addr, addrLen = socket.ConvertAddress(s.family, *peerAddr) 534 } 535 536 fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{ 537 CloseOnExec: flags&linux.SOCK_CLOEXEC != 0, 538 }) 539 540 t.Kernel().RecordSocket(ns) 541 542 return fd, addr, addrLen, syserr.FromError(e) 543 } 544 545 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by 546 // tcpip.Endpoint. 547 func (s *sock) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { 548 // TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is 549 // implemented specifically for netstack.Socket rather than 550 // commonEndpoint. commonEndpoint should be extended to support socket 551 // options where the implementation is not shared, as unix sockets need 552 // their own support for SO_TIMESTAMP. 553 if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP { 554 if outLen < sizeOfInt32 { 555 return nil, syserr.ErrInvalidArgument 556 } 557 val := primitive.Int32(0) 558 s.readMu.Lock() 559 defer s.readMu.Unlock() 560 if s.sockOptTimestamp { 561 val = 1 562 } 563 return &val, nil 564 } 565 if level == linux.SOL_TCP && name == linux.TCP_INQ { 566 if outLen < sizeOfInt32 { 567 return nil, syserr.ErrInvalidArgument 568 } 569 val := primitive.Int32(0) 570 s.readMu.Lock() 571 defer s.readMu.Unlock() 572 if s.sockOptInq { 573 val = 1 574 } 575 return &val, nil 576 } 577 578 return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outPtr, outLen) 579 } 580 581 // SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by 582 // tcpip.Endpoint. 583 func (s *sock) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error { 584 // TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is 585 // implemented specifically for netstack.Socket rather than 586 // commonEndpoint. commonEndpoint should be extended to support socket 587 // options where the implementation is not shared, as unix sockets need 588 // their own support for SO_TIMESTAMP. 589 if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP { 590 if len(optVal) < sizeOfInt32 { 591 return syserr.ErrInvalidArgument 592 } 593 s.readMu.Lock() 594 defer s.readMu.Unlock() 595 s.sockOptTimestamp = hostarch.ByteOrder.Uint32(optVal) != 0 596 return nil 597 } 598 if level == linux.SOL_TCP && name == linux.TCP_INQ { 599 if len(optVal) < sizeOfInt32 { 600 return syserr.ErrInvalidArgument 601 } 602 s.readMu.Lock() 603 defer s.readMu.Unlock() 604 s.sockOptInq = hostarch.ByteOrder.Uint32(optVal) != 0 605 return nil 606 } 607 608 return SetSockOpt(t, s, s.Endpoint, level, name, optVal) 609 } 610 611 var sockAddrInetSize = (*linux.SockAddrInet)(nil).SizeBytes() 612 var sockAddrInet6Size = (*linux.SockAddrInet6)(nil).SizeBytes() 613 var sockAddrLinkSize = (*linux.SockAddrLink)(nil).SizeBytes() 614 615 // minSockAddrLen returns the minimum length in bytes of a socket address for 616 // the socket's family. 617 func (s *sock) minSockAddrLen() int { 618 const addressFamilySize = 2 619 620 switch s.family { 621 case linux.AF_UNIX: 622 return addressFamilySize 623 case linux.AF_INET: 624 return sockAddrInetSize 625 case linux.AF_INET6: 626 return sockAddrInet6Size 627 case linux.AF_PACKET: 628 return sockAddrLinkSize 629 case linux.AF_UNSPEC: 630 return addressFamilySize 631 default: 632 panic(fmt.Sprintf("s.family unrecognized = %d", s.family)) 633 } 634 } 635 636 func (s *sock) isPacketBased() bool { 637 return s.skType == linux.SOCK_DGRAM || s.skType == linux.SOCK_SEQPACKET || s.skType == linux.SOCK_RDM || s.skType == linux.SOCK_RAW 638 } 639 640 // Readiness returns a mask of ready events for socket s. 641 func (s *sock) Readiness(mask waiter.EventMask) waiter.EventMask { 642 return s.Endpoint.Readiness(mask) 643 } 644 645 // checkFamily returns true iff the specified address family may be used with 646 // the socket. 647 // 648 // If exact is true, then the specified address family must be an exact match 649 // with the socket's family. 650 func (s *sock) checkFamily(family uint16, exact bool) bool { 651 if family == uint16(s.family) { 652 return true 653 } 654 if !exact && family == linux.AF_INET && s.family == linux.AF_INET6 { 655 if !s.Endpoint.SocketOptions().GetV6Only() { 656 return true 657 } 658 } 659 return false 660 } 661 662 // mapFamily maps the AF_INET ANY address to the IPv4-mapped IPv6 ANY if the 663 // receiver's family is AF_INET6. 664 // 665 // This is a hack to work around the fact that both IPv4 and IPv6 ANY are 666 // represented by the empty string. 667 // 668 // TODO(gvisor.dev/issue/1556): remove this function. 669 func (s *sock) mapFamily(addr tcpip.FullAddress, family uint16) tcpip.FullAddress { 670 if addr.Addr.BitLen() == 0 && s.family == linux.AF_INET6 && family == linux.AF_INET { 671 addr.Addr = tcpip.AddrFrom16([16]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}) 672 } 673 return addr 674 } 675 676 // Connect implements the linux syscall connect(2) for sockets backed by 677 // tpcip.Endpoint. 678 func (s *sock) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { 679 addr, family, err := socket.AddressAndFamily(sockaddr) 680 if err != nil { 681 return err 682 } 683 684 if family == linux.AF_UNSPEC { 685 err := s.Endpoint.Disconnect() 686 if _, ok := err.(*tcpip.ErrNotSupported); ok { 687 return syserr.ErrAddressFamilyNotSupported 688 } 689 return syserr.TranslateNetstackError(err) 690 } 691 692 if !s.checkFamily(family, false /* exact */) { 693 return syserr.ErrInvalidArgument 694 } 695 addr = s.mapFamily(addr, family) 696 697 // Always return right away in the non-blocking case. 698 if !blocking { 699 return syserr.TranslateNetstackError(s.Endpoint.Connect(addr)) 700 } 701 702 // Register for notification when the endpoint becomes writable, then 703 // initiate the connection. 704 e, ch := waiter.NewChannelEntry(waiter.WritableEvents) 705 s.EventRegister(&e) 706 defer s.EventUnregister(&e) 707 708 switch err := s.Endpoint.Connect(addr); err.(type) { 709 case *tcpip.ErrConnectStarted, *tcpip.ErrAlreadyConnecting: 710 case *tcpip.ErrNoPortAvailable: 711 if (s.family == unix.AF_INET || s.family == unix.AF_INET6) && s.skType == linux.SOCK_STREAM { 712 // TCP unlike UDP returns EADDRNOTAVAIL when it can't 713 // find an available local ephemeral port. 714 return syserr.ErrAddressNotAvailable 715 } 716 return syserr.TranslateNetstackError(err) 717 default: 718 return syserr.TranslateNetstackError(err) 719 } 720 721 // It's pending, so we have to wait for a notification, and fetch the 722 // result once the wait completes. 723 if err := t.Block(ch); err != nil { 724 return syserr.FromError(err) 725 } 726 727 // Call Connect() again after blocking to find connect's result. 728 return syserr.TranslateNetstackError(s.Endpoint.Connect(addr)) 729 } 730 731 // Bind implements the linux syscall bind(2) for sockets backed by 732 // tcpip.Endpoint. 733 func (s *sock) Bind(_ *kernel.Task, sockaddr []byte) *syserr.Error { 734 if len(sockaddr) < 2 { 735 return syserr.ErrInvalidArgument 736 } 737 738 family := hostarch.ByteOrder.Uint16(sockaddr) 739 var addr tcpip.FullAddress 740 741 // Bind for AF_PACKET requires only family, protocol and ifindex. 742 // In function AddressAndFamily, we check the address length which is 743 // not needed for AF_PACKET bind. 744 if family == linux.AF_PACKET { 745 var a linux.SockAddrLink 746 if len(sockaddr) < sockAddrLinkSize { 747 return syserr.ErrInvalidArgument 748 } 749 a.UnmarshalBytes(sockaddr) 750 751 addr = tcpip.FullAddress{ 752 NIC: tcpip.NICID(a.InterfaceIndex), 753 Addr: tcpip.AddrFrom16Slice(append( 754 a.HardwareAddr[:header.EthernetAddressSize], 755 []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}..., 756 )), 757 Port: socket.Ntohs(a.Protocol), 758 } 759 } else { 760 if s.minSockAddrLen() > len(sockaddr) { 761 return syserr.ErrInvalidArgument 762 } 763 764 var err *syserr.Error 765 addr, family, err = socket.AddressAndFamily(sockaddr) 766 if err != nil { 767 return err 768 } 769 770 if !s.checkFamily(family, true /* exact */) { 771 return syserr.ErrAddressFamilyNotSupported 772 } 773 774 addr = s.mapFamily(addr, family) 775 } 776 777 // Issue the bind request to the endpoint. 778 err := s.Endpoint.Bind(addr) 779 if _, ok := err.(*tcpip.ErrNoPortAvailable); ok { 780 // Bind always returns EADDRINUSE irrespective of if the specified port was 781 // already bound or if an ephemeral port was requested but none were 782 // available. 783 // 784 // *tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because 785 // UDP connect returns EAGAIN on ephemeral port exhaustion. 786 // 787 // TCP connect returns EADDRNOTAVAIL on ephemeral port exhaustion. 788 err = &tcpip.ErrPortInUse{} 789 } 790 791 return syserr.TranslateNetstackError(err) 792 } 793 794 // Listen implements the linux syscall listen(2) for sockets backed by 795 // tcpip.Endpoint. 796 func (s *sock) Listen(_ *kernel.Task, backlog int) *syserr.Error { 797 return syserr.TranslateNetstackError(s.Endpoint.Listen(backlog)) 798 } 799 800 // blockingAccept implements a blocking version of accept(2), that is, if no 801 // connections are ready to be accept, it will block until one becomes ready. 802 func (s *sock) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *syserr.Error) { 803 // Register for notifications. 804 e, ch := waiter.NewChannelEntry(waiter.ReadableEvents) 805 s.EventRegister(&e) 806 defer s.EventUnregister(&e) 807 808 // Try to accept the connection again; if it fails, then wait until we 809 // get a notification. 810 for { 811 ep, wq, err := s.Endpoint.Accept(peerAddr) 812 if _, ok := err.(*tcpip.ErrWouldBlock); !ok { 813 return ep, wq, syserr.TranslateNetstackError(err) 814 } 815 816 if err := t.Block(ch); err != nil { 817 return nil, nil, syserr.FromError(err) 818 } 819 } 820 } 821 822 // ConvertShutdown converts Linux shutdown flags into tcpip shutdown flags. 823 func ConvertShutdown(how int) (tcpip.ShutdownFlags, *syserr.Error) { 824 var f tcpip.ShutdownFlags 825 switch how { 826 case linux.SHUT_RD: 827 f = tcpip.ShutdownRead 828 case linux.SHUT_WR: 829 f = tcpip.ShutdownWrite 830 case linux.SHUT_RDWR: 831 f = tcpip.ShutdownRead | tcpip.ShutdownWrite 832 default: 833 return 0, syserr.ErrInvalidArgument 834 } 835 return f, nil 836 } 837 838 // Shutdown implements the linux syscall shutdown(2) for sockets backed by 839 // tcpip.Endpoint. 840 func (s *sock) Shutdown(_ *kernel.Task, how int) *syserr.Error { 841 f, err := ConvertShutdown(how) 842 if err != nil { 843 return err 844 } 845 846 // Issue shutdown request. 847 return syserr.TranslateNetstackError(s.Endpoint.Shutdown(f)) 848 } 849 850 // GetSockOpt can be used to implement the linux syscall getsockopt(2) for 851 // sockets backed by a commonEndpoint. 852 func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType linux.SockType, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { 853 switch level { 854 case linux.SOL_SOCKET: 855 return getSockOptSocket(t, s, ep, family, skType, name, outLen) 856 857 case linux.SOL_TCP: 858 return getSockOptTCP(t, s, ep, name, outLen) 859 860 case linux.SOL_IPV6: 861 return getSockOptIPv6(t, s, ep, name, outPtr, outLen) 862 863 case linux.SOL_IP: 864 return getSockOptIP(t, s, ep, name, outPtr, outLen, family) 865 866 case linux.SOL_ICMPV6: 867 return getSockOptICMPv6(t, s, ep, name, outLen) 868 869 case linux.SOL_UDP, 870 linux.SOL_RAW, 871 linux.SOL_PACKET: 872 // Not supported. 873 } 874 875 return nil, syserr.ErrProtocolNotAvailable 876 } 877 878 func boolToInt32(v bool) int32 { 879 if v { 880 return 1 881 } 882 return 0 883 } 884 885 // getSockOptSocket implements GetSockOpt when level is SOL_SOCKET. 886 func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, _ linux.SockType, name, outLen int) (marshal.Marshallable, *syserr.Error) { 887 // TODO(b/124056281): Stop rejecting short optLen values in getsockopt. 888 switch name { 889 case linux.SO_ERROR: 890 if outLen < sizeOfInt32 { 891 return nil, syserr.ErrInvalidArgument 892 } 893 894 // Get the last error and convert it. 895 err := ep.SocketOptions().GetLastError() 896 if err == nil { 897 optP := primitive.Int32(0) 898 return &optP, nil 899 } 900 901 optP := primitive.Int32(syserr.TranslateNetstackError(err).ToLinux()) 902 return &optP, nil 903 904 case linux.SO_PEERCRED: 905 if family != linux.AF_UNIX || outLen < unix.SizeofUcred { 906 return nil, syserr.ErrInvalidArgument 907 } 908 909 tcred := t.Credentials() 910 creds := linux.ControlMessageCredentials{ 911 PID: int32(t.ThreadGroup().ID()), 912 UID: uint32(tcred.EffectiveKUID.In(tcred.UserNamespace).OrOverflow()), 913 GID: uint32(tcred.EffectiveKGID.In(tcred.UserNamespace).OrOverflow()), 914 } 915 return &creds, nil 916 917 case linux.SO_PASSCRED: 918 if outLen < sizeOfInt32 { 919 return nil, syserr.ErrInvalidArgument 920 } 921 922 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetPassCred())) 923 return &v, nil 924 925 case linux.SO_SNDBUF: 926 if outLen < sizeOfInt32 { 927 return nil, syserr.ErrInvalidArgument 928 } 929 930 size := ep.SocketOptions().GetSendBufferSize() 931 932 if size > math.MaxInt32 { 933 size = math.MaxInt32 934 } 935 936 sizeP := primitive.Int32(size) 937 return &sizeP, nil 938 939 case linux.SO_RCVBUF: 940 if outLen < sizeOfInt32 { 941 return nil, syserr.ErrInvalidArgument 942 } 943 944 size := ep.SocketOptions().GetReceiveBufferSize() 945 946 if size > math.MaxInt32 { 947 size = math.MaxInt32 948 } 949 950 sizeP := primitive.Int32(size) 951 return &sizeP, nil 952 953 case linux.SO_REUSEADDR: 954 if outLen < sizeOfInt32 { 955 return nil, syserr.ErrInvalidArgument 956 } 957 958 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReuseAddress())) 959 return &v, nil 960 961 case linux.SO_REUSEPORT: 962 if outLen < sizeOfInt32 { 963 return nil, syserr.ErrInvalidArgument 964 } 965 966 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReusePort())) 967 return &v, nil 968 969 case linux.SO_BINDTODEVICE: 970 v := ep.SocketOptions().GetBindToDevice() 971 if v == 0 { 972 var b primitive.ByteSlice 973 return &b, nil 974 } 975 if outLen < linux.IFNAMSIZ { 976 return nil, syserr.ErrInvalidArgument 977 } 978 s := t.NetworkContext() 979 if s == nil { 980 return nil, syserr.ErrNoDevice 981 } 982 nic, ok := s.Interfaces()[int32(v)] 983 if !ok { 984 // The NICID no longer indicates a valid interface, probably because that 985 // interface was removed. 986 return nil, syserr.ErrUnknownDevice 987 } 988 989 name := primitive.ByteSlice(append([]byte(nic.Name), 0)) 990 return &name, nil 991 992 case linux.SO_BROADCAST: 993 if outLen < sizeOfInt32 { 994 return nil, syserr.ErrInvalidArgument 995 } 996 997 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetBroadcast())) 998 return &v, nil 999 1000 case linux.SO_KEEPALIVE: 1001 if outLen < sizeOfInt32 { 1002 return nil, syserr.ErrInvalidArgument 1003 } 1004 1005 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetKeepAlive())) 1006 return &v, nil 1007 1008 case linux.SO_LINGER: 1009 if outLen < linux.SizeOfLinger { 1010 return nil, syserr.ErrInvalidArgument 1011 } 1012 1013 var linger linux.Linger 1014 v := ep.SocketOptions().GetLinger() 1015 1016 if v.Enabled { 1017 linger.OnOff = 1 1018 } 1019 linger.Linger = int32(v.Timeout.Seconds()) 1020 return &linger, nil 1021 1022 case linux.SO_SNDTIMEO: 1023 // TODO(igudger): Linux allows shorter lengths for partial results. 1024 if outLen < linux.SizeOfTimeval { 1025 return nil, syserr.ErrInvalidArgument 1026 } 1027 1028 sendTimeout := linux.NsecToTimeval(s.SendTimeout()) 1029 return &sendTimeout, nil 1030 1031 case linux.SO_RCVTIMEO: 1032 // TODO(igudger): Linux allows shorter lengths for partial results. 1033 if outLen < linux.SizeOfTimeval { 1034 return nil, syserr.ErrInvalidArgument 1035 } 1036 1037 recvTimeout := linux.NsecToTimeval(s.RecvTimeout()) 1038 return &recvTimeout, nil 1039 1040 case linux.SO_OOBINLINE: 1041 if outLen < sizeOfInt32 { 1042 return nil, syserr.ErrInvalidArgument 1043 } 1044 1045 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetOutOfBandInline())) 1046 return &v, nil 1047 1048 case linux.SO_NO_CHECK: 1049 if outLen < sizeOfInt32 { 1050 return nil, syserr.ErrInvalidArgument 1051 } 1052 1053 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetNoChecksum())) 1054 return &v, nil 1055 1056 case linux.SO_ACCEPTCONN: 1057 if outLen < sizeOfInt32 { 1058 return nil, syserr.ErrInvalidArgument 1059 } 1060 1061 // This option is only viable for TCP endpoints. 1062 var v bool 1063 if socket.IsTCP(s) { 1064 v = tcp.EndpointState(ep.State()) == tcp.StateListen 1065 } 1066 vP := primitive.Int32(boolToInt32(v)) 1067 return &vP, nil 1068 1069 case linux.SO_RCVLOWAT: 1070 if outLen < sizeOfInt32 { 1071 return nil, syserr.ErrInvalidArgument 1072 } 1073 1074 v := primitive.Int32(ep.SocketOptions().GetRcvlowat()) 1075 return &v, nil 1076 } 1077 return nil, syserr.ErrProtocolNotAvailable 1078 } 1079 1080 // getSockOptTCP implements GetSockOpt when level is SOL_TCP. 1081 func getSockOptTCP(t *kernel.Task, s socket.Socket, ep commonEndpoint, name, outLen int) (marshal.Marshallable, *syserr.Error) { 1082 if !socket.IsTCP(s) { 1083 return nil, syserr.ErrUnknownProtocolOption 1084 } 1085 1086 switch name { 1087 case linux.TCP_NODELAY: 1088 if outLen < sizeOfInt32 { 1089 return nil, syserr.ErrInvalidArgument 1090 } 1091 1092 v := primitive.Int32(boolToInt32(!ep.SocketOptions().GetDelayOption())) 1093 return &v, nil 1094 1095 case linux.TCP_CORK: 1096 if outLen < sizeOfInt32 { 1097 return nil, syserr.ErrInvalidArgument 1098 } 1099 1100 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetCorkOption())) 1101 return &v, nil 1102 1103 case linux.TCP_QUICKACK: 1104 if outLen < sizeOfInt32 { 1105 return nil, syserr.ErrInvalidArgument 1106 } 1107 1108 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetQuickAck())) 1109 return &v, nil 1110 1111 case linux.TCP_MAXSEG: 1112 if outLen < sizeOfInt32 { 1113 return nil, syserr.ErrInvalidArgument 1114 } 1115 1116 v, err := ep.GetSockOptInt(tcpip.MaxSegOption) 1117 if err != nil { 1118 return nil, syserr.TranslateNetstackError(err) 1119 } 1120 vP := primitive.Int32(v) 1121 return &vP, nil 1122 1123 case linux.TCP_KEEPIDLE: 1124 if outLen < sizeOfInt32 { 1125 return nil, syserr.ErrInvalidArgument 1126 } 1127 1128 var v tcpip.KeepaliveIdleOption 1129 if err := ep.GetSockOpt(&v); err != nil { 1130 return nil, syserr.TranslateNetstackError(err) 1131 } 1132 keepAliveIdle := primitive.Int32(time.Duration(v) / time.Second) 1133 return &keepAliveIdle, nil 1134 1135 case linux.TCP_KEEPINTVL: 1136 if outLen < sizeOfInt32 { 1137 return nil, syserr.ErrInvalidArgument 1138 } 1139 1140 var v tcpip.KeepaliveIntervalOption 1141 if err := ep.GetSockOpt(&v); err != nil { 1142 return nil, syserr.TranslateNetstackError(err) 1143 } 1144 keepAliveInterval := primitive.Int32(time.Duration(v) / time.Second) 1145 return &keepAliveInterval, nil 1146 1147 case linux.TCP_KEEPCNT: 1148 if outLen < sizeOfInt32 { 1149 return nil, syserr.ErrInvalidArgument 1150 } 1151 1152 v, err := ep.GetSockOptInt(tcpip.KeepaliveCountOption) 1153 if err != nil { 1154 return nil, syserr.TranslateNetstackError(err) 1155 } 1156 vP := primitive.Int32(v) 1157 return &vP, nil 1158 1159 case linux.TCP_USER_TIMEOUT: 1160 if outLen < sizeOfInt32 { 1161 return nil, syserr.ErrInvalidArgument 1162 } 1163 1164 var v tcpip.TCPUserTimeoutOption 1165 if err := ep.GetSockOpt(&v); err != nil { 1166 return nil, syserr.TranslateNetstackError(err) 1167 } 1168 tcpUserTimeout := primitive.Int32(time.Duration(v) / time.Millisecond) 1169 return &tcpUserTimeout, nil 1170 1171 case linux.TCP_INFO: 1172 var v tcpip.TCPInfoOption 1173 if err := ep.GetSockOpt(&v); err != nil { 1174 return nil, syserr.TranslateNetstackError(err) 1175 } 1176 1177 // TODO(b/64800844): Translate fields once they are added to 1178 // tcpip.TCPInfoOption. 1179 info := linux.TCPInfo{ 1180 State: uint8(v.State), 1181 RTO: uint32(v.RTO / time.Microsecond), 1182 RTT: uint32(v.RTT / time.Microsecond), 1183 RTTVar: uint32(v.RTTVar / time.Microsecond), 1184 SndSsthresh: v.SndSsthresh, 1185 SndCwnd: v.SndCwnd, 1186 } 1187 switch v.CcState { 1188 case tcpip.RTORecovery: 1189 info.CaState = linux.TCP_CA_Loss 1190 case tcpip.FastRecovery, tcpip.SACKRecovery: 1191 info.CaState = linux.TCP_CA_Recovery 1192 case tcpip.Disorder: 1193 info.CaState = linux.TCP_CA_Disorder 1194 case tcpip.Open: 1195 info.CaState = linux.TCP_CA_Open 1196 } 1197 1198 // In netstack reorderSeen is updated only when RACK is enabled. 1199 // We only track whether the reordering is seen, which is 1200 // different than Linux where reorderSeen is not specific to 1201 // RACK and is incremented when a reordering event is seen. 1202 if v.ReorderSeen { 1203 info.ReordSeen = 1 1204 } 1205 1206 // Linux truncates the output binary to outLen. 1207 buf := t.CopyScratchBuffer(info.SizeBytes()) 1208 info.MarshalUnsafe(buf) 1209 if len(buf) > outLen { 1210 buf = buf[:outLen] 1211 } 1212 bufP := primitive.ByteSlice(buf) 1213 return &bufP, nil 1214 1215 case linux.TCP_CC_INFO, 1216 linux.TCP_NOTSENT_LOWAT, 1217 linux.TCP_ZEROCOPY_RECEIVE: 1218 1219 // Not supported. 1220 1221 case linux.TCP_CONGESTION: 1222 if outLen <= 0 { 1223 return nil, syserr.ErrInvalidArgument 1224 } 1225 1226 var v tcpip.CongestionControlOption 1227 if err := ep.GetSockOpt(&v); err != nil { 1228 return nil, syserr.TranslateNetstackError(err) 1229 } 1230 1231 // We match linux behaviour here where it returns the lower of 1232 // TCP_CA_NAME_MAX bytes or the value of the option length. 1233 // 1234 // This is Linux's net/tcp.h TCP_CA_NAME_MAX. 1235 const tcpCANameMax = 16 1236 1237 toCopy := tcpCANameMax 1238 if outLen < tcpCANameMax { 1239 toCopy = outLen 1240 } 1241 b := make([]byte, toCopy) 1242 copy(b, v) 1243 1244 bP := primitive.ByteSlice(b) 1245 return &bP, nil 1246 1247 case linux.TCP_LINGER2: 1248 if outLen < sizeOfInt32 { 1249 return nil, syserr.ErrInvalidArgument 1250 } 1251 1252 var v tcpip.TCPLingerTimeoutOption 1253 if err := ep.GetSockOpt(&v); err != nil { 1254 return nil, syserr.TranslateNetstackError(err) 1255 } 1256 var lingerTimeout primitive.Int32 1257 if v >= 0 { 1258 lingerTimeout = primitive.Int32(time.Duration(v) / time.Second) 1259 } else { 1260 lingerTimeout = -1 1261 } 1262 return &lingerTimeout, nil 1263 1264 case linux.TCP_DEFER_ACCEPT: 1265 if outLen < sizeOfInt32 { 1266 return nil, syserr.ErrInvalidArgument 1267 } 1268 1269 var v tcpip.TCPDeferAcceptOption 1270 if err := ep.GetSockOpt(&v); err != nil { 1271 return nil, syserr.TranslateNetstackError(err) 1272 } 1273 1274 tcpDeferAccept := primitive.Int32(time.Duration(v) / time.Second) 1275 return &tcpDeferAccept, nil 1276 1277 case linux.TCP_SYNCNT: 1278 if outLen < sizeOfInt32 { 1279 return nil, syserr.ErrInvalidArgument 1280 } 1281 1282 v, err := ep.GetSockOptInt(tcpip.TCPSynCountOption) 1283 if err != nil { 1284 return nil, syserr.TranslateNetstackError(err) 1285 } 1286 vP := primitive.Int32(v) 1287 return &vP, nil 1288 1289 case linux.TCP_WINDOW_CLAMP: 1290 if outLen < sizeOfInt32 { 1291 return nil, syserr.ErrInvalidArgument 1292 } 1293 1294 v, err := ep.GetSockOptInt(tcpip.TCPWindowClampOption) 1295 if err != nil { 1296 return nil, syserr.TranslateNetstackError(err) 1297 } 1298 vP := primitive.Int32(v) 1299 return &vP, nil 1300 } 1301 return nil, syserr.ErrProtocolNotAvailable 1302 } 1303 1304 func getSockOptICMPv6(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, outLen int) (marshal.Marshallable, *syserr.Error) { 1305 if _, ok := ep.(tcpip.Endpoint); !ok { 1306 log.Warningf("SOL_ICMPV6 options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 1307 return nil, syserr.ErrUnknownProtocolOption 1308 } 1309 1310 if family, _, _ := s.Type(); family != linux.AF_INET6 { 1311 return nil, syserr.ErrNotSupported 1312 } 1313 1314 switch name { 1315 case linux.ICMPV6_FILTER: 1316 var v tcpip.ICMPv6Filter 1317 if err := ep.GetSockOpt(&v); err != nil { 1318 return nil, syserr.TranslateNetstackError(err) 1319 } 1320 1321 filter := linux.ICMP6Filter{Filter: v.DenyType} 1322 1323 // Linux truncates the output to outLen. 1324 buf := t.CopyScratchBuffer(filter.SizeBytes()) 1325 filter.MarshalUnsafe(buf) 1326 if len(buf) > outLen { 1327 buf = buf[:outLen] 1328 } 1329 bufP := primitive.ByteSlice(buf) 1330 return &bufP, nil 1331 } 1332 return nil, syserr.ErrProtocolNotAvailable 1333 } 1334 1335 func defaultTTL(t *kernel.Task, network tcpip.NetworkProtocolNumber) (primitive.Int32, tcpip.Error) { 1336 var opt tcpip.DefaultTTLOption 1337 stack := inet.StackFromContext(t) 1338 if err := stack.(*Stack).Stack.NetworkProtocolOption(network, &opt); err != nil { 1339 return 0, err 1340 } 1341 return primitive.Int32(opt), nil 1342 } 1343 1344 // getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6. 1345 func getSockOptIPv6(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { 1346 if _, ok := ep.(tcpip.Endpoint); !ok { 1347 log.Warningf("SOL_IPV6 options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 1348 return nil, syserr.ErrUnknownProtocolOption 1349 } 1350 1351 family, skType, _ := s.Type() 1352 if family != linux.AF_INET6 { 1353 return nil, syserr.ErrNotSupported 1354 } 1355 1356 switch name { 1357 case linux.IPV6_CHECKSUM: 1358 if outLen < sizeOfInt32 { 1359 return nil, syserr.ErrInvalidArgument 1360 } 1361 1362 v, err := ep.GetSockOptInt(tcpip.IPv6Checksum) 1363 if err != nil { 1364 return nil, syserr.TranslateNetstackError(err) 1365 } 1366 1367 vP := primitive.Int32(v) 1368 return &vP, nil 1369 1370 case linux.IPV6_V6ONLY: 1371 if outLen < sizeOfInt32 { 1372 return nil, syserr.ErrInvalidArgument 1373 } 1374 1375 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetV6Only())) 1376 return &v, nil 1377 1378 case linux.IPV6_UNICAST_HOPS: 1379 if outLen < sizeOfInt32 { 1380 return nil, syserr.ErrInvalidArgument 1381 } 1382 1383 v, err := ep.GetSockOptInt(tcpip.IPv6HopLimitOption) 1384 if err != nil { 1385 return nil, syserr.TranslateNetstackError(err) 1386 } 1387 1388 // Fill in the default value, if needed. 1389 vP := primitive.Int32(v) 1390 if vP == -1 { 1391 vP, err = defaultTTL(t, header.IPv6ProtocolNumber) 1392 if err != nil { 1393 return nil, syserr.TranslateNetstackError(err) 1394 } 1395 } 1396 1397 return &vP, nil 1398 1399 case linux.IPV6_RECVHOPLIMIT: 1400 if outLen < sizeOfInt32 { 1401 return nil, syserr.ErrInvalidArgument 1402 } 1403 1404 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveHopLimit())) 1405 return &v, nil 1406 1407 case linux.IPV6_PATHMTU: 1408 // Not supported. 1409 1410 case linux.IPV6_TCLASS: 1411 // Length handling for parity with Linux. 1412 if outLen == 0 { 1413 var b primitive.ByteSlice 1414 return &b, nil 1415 } 1416 v, err := ep.GetSockOptInt(tcpip.IPv6TrafficClassOption) 1417 if err != nil { 1418 return nil, syserr.TranslateNetstackError(err) 1419 } 1420 1421 uintv := primitive.Uint32(v) 1422 // Linux truncates the output binary to outLen. 1423 ib := t.CopyScratchBuffer(uintv.SizeBytes()) 1424 uintv.MarshalUnsafe(ib) 1425 // Handle cases where outLen is lesser than sizeOfInt32. 1426 if len(ib) > outLen { 1427 ib = ib[:outLen] 1428 } 1429 ibP := primitive.ByteSlice(ib) 1430 return &ibP, nil 1431 1432 case linux.IPV6_RECVTCLASS: 1433 if outLen < sizeOfInt32 { 1434 return nil, syserr.ErrInvalidArgument 1435 } 1436 1437 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveTClass())) 1438 return &v, nil 1439 case linux.IPV6_RECVERR: 1440 if outLen < sizeOfInt32 { 1441 return nil, syserr.ErrInvalidArgument 1442 } 1443 1444 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetIPv6RecvError())) 1445 return &v, nil 1446 1447 case linux.IPV6_RECVORIGDSTADDR: 1448 if outLen < sizeOfInt32 { 1449 return nil, syserr.ErrInvalidArgument 1450 } 1451 1452 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveOriginalDstAddress())) 1453 return &v, nil 1454 1455 case linux.IPV6_RECVPKTINFO: 1456 if outLen < sizeOfInt32 { 1457 return nil, syserr.ErrInvalidArgument 1458 } 1459 1460 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetIPv6ReceivePacketInfo())) 1461 return &v, nil 1462 1463 case linux.IP6T_ORIGINAL_DST: 1464 if outLen < sockAddrInet6Size { 1465 return nil, syserr.ErrInvalidArgument 1466 } 1467 1468 var v tcpip.OriginalDestinationOption 1469 if err := ep.GetSockOpt(&v); err != nil { 1470 return nil, syserr.TranslateNetstackError(err) 1471 } 1472 1473 a, _ := socket.ConvertAddress(linux.AF_INET6, tcpip.FullAddress(v)) 1474 return a.(*linux.SockAddrInet6), nil 1475 1476 case linux.IP6T_SO_GET_INFO: 1477 if outLen < linux.SizeOfIPTGetinfo { 1478 return nil, syserr.ErrInvalidArgument 1479 } 1480 1481 // Only valid for raw IPv6 sockets. 1482 if skType != linux.SOCK_RAW { 1483 return nil, syserr.ErrProtocolNotAvailable 1484 } 1485 1486 stk := inet.StackFromContext(t) 1487 if stk == nil { 1488 return nil, syserr.ErrNoDevice 1489 } 1490 info, err := netfilter.GetInfo(t, stk.(*Stack).Stack, outPtr, true) 1491 if err != nil { 1492 return nil, err 1493 } 1494 return &info, nil 1495 1496 case linux.IP6T_SO_GET_ENTRIES: 1497 // IPTGetEntries is reused for IPv6. 1498 if outLen < linux.SizeOfIPTGetEntries { 1499 return nil, syserr.ErrInvalidArgument 1500 } 1501 // Only valid for raw IPv6 sockets. 1502 if skType != linux.SOCK_RAW { 1503 return nil, syserr.ErrProtocolNotAvailable 1504 } 1505 1506 stk := inet.StackFromContext(t) 1507 if stk == nil { 1508 return nil, syserr.ErrNoDevice 1509 } 1510 entries, err := netfilter.GetEntries6(t, stk.(*Stack).Stack, outPtr, outLen) 1511 if err != nil { 1512 return nil, err 1513 } 1514 return &entries, nil 1515 1516 case linux.IP6T_SO_GET_REVISION_TARGET: 1517 if outLen < linux.SizeOfXTGetRevision { 1518 return nil, syserr.ErrInvalidArgument 1519 } 1520 1521 // Only valid for raw IPv6 sockets. 1522 if skType != linux.SOCK_RAW { 1523 return nil, syserr.ErrProtocolNotAvailable 1524 } 1525 1526 stk := inet.StackFromContext(t) 1527 if stk == nil { 1528 return nil, syserr.ErrNoDevice 1529 } 1530 ret, err := netfilter.TargetRevision(t, outPtr, header.IPv6ProtocolNumber) 1531 if err != nil { 1532 return nil, err 1533 } 1534 return &ret, nil 1535 } 1536 return nil, syserr.ErrProtocolNotAvailable 1537 } 1538 1539 // getSockOptIP implements GetSockOpt when level is SOL_IP. 1540 func getSockOptIP(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int, _ int) (marshal.Marshallable, *syserr.Error) { 1541 if _, ok := ep.(tcpip.Endpoint); !ok { 1542 log.Warningf("SOL_IP options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 1543 return nil, syserr.ErrUnknownProtocolOption 1544 } 1545 1546 switch name { 1547 case linux.IP_TTL: 1548 if outLen < sizeOfInt32 { 1549 return nil, syserr.ErrInvalidArgument 1550 } 1551 1552 v, err := ep.GetSockOptInt(tcpip.IPv4TTLOption) 1553 if err != nil { 1554 return nil, syserr.TranslateNetstackError(err) 1555 } 1556 1557 // Fill in the default value, if needed. 1558 vP := primitive.Int32(v) 1559 if vP == 0 { 1560 vP, err = defaultTTL(t, header.IPv4ProtocolNumber) 1561 if err != nil { 1562 return nil, syserr.TranslateNetstackError(err) 1563 } 1564 } 1565 1566 return &vP, nil 1567 1568 case linux.IP_RECVTTL: 1569 if outLen < sizeOfInt32 { 1570 return nil, syserr.ErrInvalidArgument 1571 } 1572 1573 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveTTL())) 1574 return &v, nil 1575 1576 case linux.IP_MULTICAST_TTL: 1577 if outLen < sizeOfInt32 { 1578 return nil, syserr.ErrInvalidArgument 1579 } 1580 1581 v, err := ep.GetSockOptInt(tcpip.MulticastTTLOption) 1582 if err != nil { 1583 return nil, syserr.TranslateNetstackError(err) 1584 } 1585 1586 vP := primitive.Int32(v) 1587 return &vP, nil 1588 1589 case linux.IP_MULTICAST_IF: 1590 if outLen < len(linux.InetAddr{}) { 1591 return nil, syserr.ErrInvalidArgument 1592 } 1593 1594 var v tcpip.MulticastInterfaceOption 1595 if err := ep.GetSockOpt(&v); err != nil { 1596 return nil, syserr.TranslateNetstackError(err) 1597 } 1598 1599 a, _ := socket.ConvertAddress(linux.AF_INET, tcpip.FullAddress{Addr: v.InterfaceAddr}) 1600 1601 return &a.(*linux.SockAddrInet).Addr, nil 1602 1603 case linux.IP_MULTICAST_LOOP: 1604 if outLen < sizeOfInt32 { 1605 return nil, syserr.ErrInvalidArgument 1606 } 1607 1608 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetMulticastLoop())) 1609 return &v, nil 1610 1611 case linux.IP_TOS: 1612 // Length handling for parity with Linux. 1613 if outLen == 0 { 1614 var b primitive.ByteSlice 1615 return &b, nil 1616 } 1617 v, err := ep.GetSockOptInt(tcpip.IPv4TOSOption) 1618 if err != nil { 1619 return nil, syserr.TranslateNetstackError(err) 1620 } 1621 if outLen < sizeOfInt32 { 1622 vP := primitive.Uint8(v) 1623 return &vP, nil 1624 } 1625 vP := primitive.Int32(v) 1626 return &vP, nil 1627 1628 case linux.IP_RECVTOS: 1629 if outLen < sizeOfInt32 { 1630 return nil, syserr.ErrInvalidArgument 1631 } 1632 1633 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveTOS())) 1634 return &v, nil 1635 1636 case linux.IP_RECVERR: 1637 if outLen < sizeOfInt32 { 1638 return nil, syserr.ErrInvalidArgument 1639 } 1640 1641 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetIPv4RecvError())) 1642 return &v, nil 1643 1644 case linux.IP_PKTINFO: 1645 if outLen < sizeOfInt32 { 1646 return nil, syserr.ErrInvalidArgument 1647 } 1648 1649 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceivePacketInfo())) 1650 return &v, nil 1651 1652 case linux.IP_HDRINCL: 1653 if outLen < sizeOfInt32 { 1654 return nil, syserr.ErrInvalidArgument 1655 } 1656 1657 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetHeaderIncluded())) 1658 return &v, nil 1659 1660 case linux.IP_RECVORIGDSTADDR: 1661 if outLen < sizeOfInt32 { 1662 return nil, syserr.ErrInvalidArgument 1663 } 1664 1665 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveOriginalDstAddress())) 1666 return &v, nil 1667 1668 case linux.SO_ORIGINAL_DST: 1669 if outLen < sockAddrInetSize { 1670 return nil, syserr.ErrInvalidArgument 1671 } 1672 1673 var v tcpip.OriginalDestinationOption 1674 if err := ep.GetSockOpt(&v); err != nil { 1675 return nil, syserr.TranslateNetstackError(err) 1676 } 1677 1678 a, _ := socket.ConvertAddress(linux.AF_INET, tcpip.FullAddress(v)) 1679 return a.(*linux.SockAddrInet), nil 1680 1681 case linux.IPT_SO_GET_INFO: 1682 if outLen < linux.SizeOfIPTGetinfo { 1683 return nil, syserr.ErrInvalidArgument 1684 } 1685 1686 // Only valid for raw IPv4 sockets. 1687 if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { 1688 return nil, syserr.ErrProtocolNotAvailable 1689 } 1690 1691 stk := inet.StackFromContext(t) 1692 if stk == nil { 1693 return nil, syserr.ErrNoDevice 1694 } 1695 info, err := netfilter.GetInfo(t, stk.(*Stack).Stack, outPtr, false) 1696 if err != nil { 1697 return nil, err 1698 } 1699 return &info, nil 1700 1701 case linux.IPT_SO_GET_ENTRIES: 1702 if outLen < linux.SizeOfIPTGetEntries { 1703 return nil, syserr.ErrInvalidArgument 1704 } 1705 1706 // Only valid for raw IPv4 sockets. 1707 if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { 1708 return nil, syserr.ErrProtocolNotAvailable 1709 } 1710 1711 stk := inet.StackFromContext(t) 1712 if stk == nil { 1713 return nil, syserr.ErrNoDevice 1714 } 1715 entries, err := netfilter.GetEntries4(t, stk.(*Stack).Stack, outPtr, outLen) 1716 if err != nil { 1717 return nil, err 1718 } 1719 return &entries, nil 1720 1721 case linux.IPT_SO_GET_REVISION_TARGET: 1722 if outLen < linux.SizeOfXTGetRevision { 1723 return nil, syserr.ErrInvalidArgument 1724 } 1725 1726 // Only valid for raw IPv4 sockets. 1727 if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { 1728 return nil, syserr.ErrProtocolNotAvailable 1729 } 1730 1731 stk := inet.StackFromContext(t) 1732 if stk == nil { 1733 return nil, syserr.ErrNoDevice 1734 } 1735 ret, err := netfilter.TargetRevision(t, outPtr, header.IPv4ProtocolNumber) 1736 if err != nil { 1737 return nil, err 1738 } 1739 return &ret, nil 1740 } 1741 return nil, syserr.ErrProtocolNotAvailable 1742 } 1743 1744 // SetSockOpt can be used to implement the linux syscall setsockopt(2) for 1745 // sockets backed by a commonEndpoint. 1746 func SetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, level int, name int, optVal []byte) *syserr.Error { 1747 switch level { 1748 case linux.SOL_SOCKET: 1749 return setSockOptSocket(t, s, ep, name, optVal) 1750 1751 case linux.SOL_TCP: 1752 return setSockOptTCP(t, s, ep, name, optVal) 1753 1754 case linux.SOL_ICMPV6: 1755 return setSockOptICMPv6(t, s, ep, name, optVal) 1756 1757 case linux.SOL_IPV6: 1758 return setSockOptIPv6(t, s, ep, name, optVal) 1759 1760 case linux.SOL_IP: 1761 return setSockOptIP(t, s, ep, name, optVal) 1762 1763 case linux.SOL_PACKET: 1764 // gVisor doesn't support any SOL_PACKET options just return not 1765 // supported. Returning nil here will result in tcpdump thinking AF_PACKET 1766 // features are supported and proceed to use them and break. 1767 return syserr.ErrProtocolNotAvailable 1768 1769 case linux.SOL_UDP, 1770 linux.SOL_RAW: 1771 // Not supported. 1772 } 1773 1774 return nil 1775 } 1776 1777 func clampBufSize(newSz, min, max int64, ignoreMax bool) int64 { 1778 // packetOverheadFactor is used to multiply the value provided by the user on 1779 // a setsockopt(2) for setting the send/receive buffer sizes sockets. 1780 const packetOverheadFactor = 2 1781 1782 if !ignoreMax && newSz > max { 1783 newSz = max 1784 } 1785 1786 if newSz < math.MaxInt32/packetOverheadFactor { 1787 newSz *= packetOverheadFactor 1788 if newSz < min { 1789 newSz = min 1790 } 1791 } else { 1792 newSz = math.MaxInt32 1793 } 1794 return newSz 1795 } 1796 1797 // setSockOptSocket implements SetSockOpt when level is SOL_SOCKET. 1798 func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 1799 switch name { 1800 case linux.SO_SNDBUF: 1801 if len(optVal) < sizeOfInt32 { 1802 return syserr.ErrInvalidArgument 1803 } 1804 1805 v := hostarch.ByteOrder.Uint32(optVal) 1806 min, max := ep.SocketOptions().SendBufferLimits() 1807 clamped := clampBufSize(int64(v), min, max, false /* ignoreMax */) 1808 ep.SocketOptions().SetSendBufferSize(clamped, true /* notify */) 1809 return nil 1810 1811 case linux.SO_RCVBUF: 1812 if len(optVal) < sizeOfInt32 { 1813 return syserr.ErrInvalidArgument 1814 } 1815 1816 v := hostarch.ByteOrder.Uint32(optVal) 1817 min, max := ep.SocketOptions().ReceiveBufferLimits() 1818 clamped := clampBufSize(int64(v), min, max, false /* ignoreMax */) 1819 ep.SocketOptions().SetReceiveBufferSize(clamped, true /* notify */) 1820 return nil 1821 1822 case linux.SO_RCVBUFFORCE: 1823 if len(optVal) < sizeOfInt32 { 1824 return syserr.ErrInvalidArgument 1825 } 1826 1827 if creds := auth.CredentialsFromContext(t); !creds.HasCapability(linux.CAP_NET_ADMIN) { 1828 return syserr.ErrNotPermitted 1829 } 1830 1831 v := hostarch.ByteOrder.Uint32(optVal) 1832 min, max := ep.SocketOptions().ReceiveBufferLimits() 1833 clamped := clampBufSize(int64(v), min, max, true /* ignoreMax */) 1834 ep.SocketOptions().SetReceiveBufferSize(clamped, true /* notify */) 1835 return nil 1836 1837 case linux.SO_REUSEADDR: 1838 if len(optVal) < sizeOfInt32 { 1839 return syserr.ErrInvalidArgument 1840 } 1841 1842 v := hostarch.ByteOrder.Uint32(optVal) 1843 ep.SocketOptions().SetReuseAddress(v != 0) 1844 return nil 1845 1846 case linux.SO_REUSEPORT: 1847 if len(optVal) < sizeOfInt32 { 1848 return syserr.ErrInvalidArgument 1849 } 1850 1851 v := hostarch.ByteOrder.Uint32(optVal) 1852 ep.SocketOptions().SetReusePort(v != 0) 1853 return nil 1854 1855 case linux.SO_BINDTODEVICE: 1856 n := bytes.IndexByte(optVal, 0) 1857 if n == -1 { 1858 n = len(optVal) 1859 } 1860 name := string(optVal[:n]) 1861 if name == "" { 1862 return syserr.TranslateNetstackError(ep.SocketOptions().SetBindToDevice(0)) 1863 } 1864 s := t.NetworkContext() 1865 if s == nil { 1866 return syserr.ErrNoDevice 1867 } 1868 for nicID, nic := range s.Interfaces() { 1869 if nic.Name == name { 1870 return syserr.TranslateNetstackError(ep.SocketOptions().SetBindToDevice(nicID)) 1871 } 1872 } 1873 return syserr.ErrUnknownDevice 1874 1875 case linux.SO_BROADCAST: 1876 if len(optVal) < sizeOfInt32 { 1877 return syserr.ErrInvalidArgument 1878 } 1879 1880 v := hostarch.ByteOrder.Uint32(optVal) 1881 ep.SocketOptions().SetBroadcast(v != 0) 1882 return nil 1883 1884 case linux.SO_PASSCRED: 1885 if len(optVal) < sizeOfInt32 { 1886 return syserr.ErrInvalidArgument 1887 } 1888 1889 v := hostarch.ByteOrder.Uint32(optVal) 1890 ep.SocketOptions().SetPassCred(v != 0) 1891 return nil 1892 1893 case linux.SO_KEEPALIVE: 1894 if len(optVal) < sizeOfInt32 { 1895 return syserr.ErrInvalidArgument 1896 } 1897 1898 v := hostarch.ByteOrder.Uint32(optVal) 1899 ep.SocketOptions().SetKeepAlive(v != 0) 1900 return nil 1901 1902 case linux.SO_SNDTIMEO: 1903 if len(optVal) < linux.SizeOfTimeval { 1904 return syserr.ErrInvalidArgument 1905 } 1906 1907 var v linux.Timeval 1908 v.UnmarshalBytes(optVal) 1909 if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { 1910 return syserr.ErrDomain 1911 } 1912 s.SetSendTimeout(v.ToNsecCapped()) 1913 return nil 1914 1915 case linux.SO_RCVTIMEO: 1916 if len(optVal) < linux.SizeOfTimeval { 1917 return syserr.ErrInvalidArgument 1918 } 1919 1920 var v linux.Timeval 1921 v.UnmarshalBytes(optVal) 1922 if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { 1923 return syserr.ErrDomain 1924 } 1925 s.SetRecvTimeout(v.ToNsecCapped()) 1926 return nil 1927 1928 case linux.SO_OOBINLINE: 1929 if len(optVal) < sizeOfInt32 { 1930 return syserr.ErrInvalidArgument 1931 } 1932 1933 v := hostarch.ByteOrder.Uint32(optVal) 1934 ep.SocketOptions().SetOutOfBandInline(v != 0) 1935 return nil 1936 1937 case linux.SO_NO_CHECK: 1938 if len(optVal) < sizeOfInt32 { 1939 return syserr.ErrInvalidArgument 1940 } 1941 1942 v := hostarch.ByteOrder.Uint32(optVal) 1943 ep.SocketOptions().SetNoChecksum(v != 0) 1944 return nil 1945 1946 case linux.SO_LINGER: 1947 if len(optVal) < linux.SizeOfLinger { 1948 return syserr.ErrInvalidArgument 1949 } 1950 1951 var v linux.Linger 1952 v.UnmarshalBytes(optVal) 1953 1954 ep.SocketOptions().SetLinger(tcpip.LingerOption{ 1955 Enabled: v.OnOff != 0, 1956 Timeout: time.Second * time.Duration(v.Linger), 1957 }) 1958 return nil 1959 1960 case linux.SO_DETACH_FILTER: 1961 // optval is ignored. 1962 var v tcpip.SocketDetachFilterOption 1963 return syserr.TranslateNetstackError(ep.SetSockOpt(&v)) 1964 1965 // TODO(b/226603727): Add support for SO_RCVLOWAT option. For now, only 1966 // the unsupported syscall message is removed. 1967 case linux.SO_RCVLOWAT: 1968 if len(optVal) < sizeOfInt32 { 1969 return syserr.ErrInvalidArgument 1970 } 1971 1972 v := hostarch.ByteOrder.Uint32(optVal) 1973 ep.SocketOptions().SetRcvlowat(int32(v)) 1974 return nil 1975 } 1976 1977 return nil 1978 } 1979 1980 // setSockOptTCP implements SetSockOpt when level is SOL_TCP. 1981 func setSockOptTCP(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 1982 if !socket.IsTCP(s) { 1983 return syserr.ErrUnknownProtocolOption 1984 } 1985 1986 switch name { 1987 case linux.TCP_NODELAY: 1988 if len(optVal) < sizeOfInt32 { 1989 return syserr.ErrInvalidArgument 1990 } 1991 1992 v := hostarch.ByteOrder.Uint32(optVal) 1993 ep.SocketOptions().SetDelayOption(v == 0) 1994 return nil 1995 1996 case linux.TCP_CORK: 1997 if len(optVal) < sizeOfInt32 { 1998 return syserr.ErrInvalidArgument 1999 } 2000 2001 v := hostarch.ByteOrder.Uint32(optVal) 2002 ep.SocketOptions().SetCorkOption(v != 0) 2003 return nil 2004 2005 case linux.TCP_QUICKACK: 2006 if len(optVal) < sizeOfInt32 { 2007 return syserr.ErrInvalidArgument 2008 } 2009 2010 v := hostarch.ByteOrder.Uint32(optVal) 2011 ep.SocketOptions().SetQuickAck(v != 0) 2012 return nil 2013 2014 case linux.TCP_MAXSEG: 2015 if len(optVal) < sizeOfInt32 { 2016 return syserr.ErrInvalidArgument 2017 } 2018 2019 v := hostarch.ByteOrder.Uint32(optVal) 2020 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.MaxSegOption, int(v))) 2021 2022 case linux.TCP_KEEPIDLE: 2023 if len(optVal) < sizeOfInt32 { 2024 return syserr.ErrInvalidArgument 2025 } 2026 2027 v := hostarch.ByteOrder.Uint32(optVal) 2028 if v < 1 || v > linux.MAX_TCP_KEEPIDLE { 2029 return syserr.ErrInvalidArgument 2030 } 2031 opt := tcpip.KeepaliveIdleOption(time.Second * time.Duration(v)) 2032 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 2033 2034 case linux.TCP_KEEPINTVL: 2035 if len(optVal) < sizeOfInt32 { 2036 return syserr.ErrInvalidArgument 2037 } 2038 2039 v := hostarch.ByteOrder.Uint32(optVal) 2040 if v < 1 || v > linux.MAX_TCP_KEEPINTVL { 2041 return syserr.ErrInvalidArgument 2042 } 2043 opt := tcpip.KeepaliveIntervalOption(time.Second * time.Duration(v)) 2044 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 2045 2046 case linux.TCP_KEEPCNT: 2047 if len(optVal) < sizeOfInt32 { 2048 return syserr.ErrInvalidArgument 2049 } 2050 2051 v := hostarch.ByteOrder.Uint32(optVal) 2052 if v < 1 || v > linux.MAX_TCP_KEEPCNT { 2053 return syserr.ErrInvalidArgument 2054 } 2055 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.KeepaliveCountOption, int(v))) 2056 2057 case linux.TCP_USER_TIMEOUT: 2058 if len(optVal) < sizeOfInt32 { 2059 return syserr.ErrInvalidArgument 2060 } 2061 2062 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2063 if v < 0 { 2064 return syserr.ErrInvalidArgument 2065 } 2066 opt := tcpip.TCPUserTimeoutOption(time.Millisecond * time.Duration(v)) 2067 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 2068 2069 case linux.TCP_CONGESTION: 2070 v := tcpip.CongestionControlOption(optVal) 2071 if err := ep.SetSockOpt(&v); err != nil { 2072 return syserr.TranslateNetstackError(err) 2073 } 2074 return nil 2075 2076 case linux.TCP_LINGER2: 2077 if len(optVal) < sizeOfInt32 { 2078 return syserr.ErrInvalidArgument 2079 } 2080 2081 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2082 opt := tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v)) 2083 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 2084 2085 case linux.TCP_DEFER_ACCEPT: 2086 if len(optVal) < sizeOfInt32 { 2087 return syserr.ErrInvalidArgument 2088 } 2089 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2090 if v < 0 { 2091 v = 0 2092 } 2093 opt := tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v)) 2094 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 2095 2096 case linux.TCP_SYNCNT: 2097 if len(optVal) < sizeOfInt32 { 2098 return syserr.ErrInvalidArgument 2099 } 2100 v := hostarch.ByteOrder.Uint32(optVal) 2101 2102 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v))) 2103 2104 case linux.TCP_WINDOW_CLAMP: 2105 if len(optVal) < sizeOfInt32 { 2106 return syserr.ErrInvalidArgument 2107 } 2108 v := hostarch.ByteOrder.Uint32(optVal) 2109 2110 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v))) 2111 2112 case linux.TCP_REPAIR_OPTIONS: 2113 // Not supported. 2114 } 2115 2116 return nil 2117 } 2118 2119 func setSockOptICMPv6(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 2120 if _, ok := ep.(tcpip.Endpoint); !ok { 2121 log.Warningf("SOL_ICMPV6 options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 2122 return syserr.ErrUnknownProtocolOption 2123 } 2124 2125 if family, _, _ := s.Type(); family != linux.AF_INET6 { 2126 return syserr.ErrUnknownProtocolOption 2127 } 2128 2129 switch name { 2130 case linux.ICMPV6_FILTER: 2131 var req linux.ICMP6Filter 2132 if len(optVal) < req.SizeBytes() { 2133 return syserr.ErrInvalidArgument 2134 } 2135 2136 req.UnmarshalUnsafe(optVal) 2137 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.ICMPv6Filter{DenyType: req.Filter})) 2138 } 2139 2140 return nil 2141 } 2142 2143 // setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6. 2144 func setSockOptIPv6(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 2145 if _, ok := ep.(tcpip.Endpoint); !ok { 2146 log.Warningf("SOL_IPV6 options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 2147 return syserr.ErrUnknownProtocolOption 2148 } 2149 2150 family, _, _ := s.Type() 2151 if family != linux.AF_INET6 { 2152 return syserr.ErrUnknownProtocolOption 2153 } 2154 2155 switch name { 2156 case linux.IPV6_CHECKSUM: 2157 if len(optVal) < sizeOfInt32 { 2158 return syserr.ErrInvalidArgument 2159 } 2160 2161 // int may not be 32-bits so we cast the uint32 to an int32 before casting 2162 // to an int. 2163 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.IPv6Checksum, int(int32(hostarch.ByteOrder.Uint32(optVal))))) 2164 2165 case linux.IPV6_V6ONLY: 2166 if len(optVal) < sizeOfInt32 { 2167 return syserr.ErrInvalidArgument 2168 } 2169 2170 if socket.IsTCP(s) && tcp.EndpointState(ep.State()) != tcp.StateInitial { 2171 return syserr.ErrInvalidEndpointState 2172 } else if socket.IsUDP(s) && transport.DatagramEndpointState(ep.State()) != transport.DatagramEndpointStateInitial { 2173 return syserr.ErrInvalidEndpointState 2174 } 2175 2176 v := hostarch.ByteOrder.Uint32(optVal) 2177 ep.SocketOptions().SetV6Only(v != 0) 2178 return nil 2179 2180 case linux.IPV6_ADD_MEMBERSHIP: 2181 req, err := copyInMulticastV6Request(optVal) 2182 if err != nil { 2183 return err 2184 } 2185 2186 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.AddMembershipOption{ 2187 NIC: tcpip.NICID(req.InterfaceIndex), 2188 MulticastAddr: tcpip.AddrFrom16(req.MulticastAddr), 2189 })) 2190 2191 case linux.IPV6_DROP_MEMBERSHIP: 2192 req, err := copyInMulticastV6Request(optVal) 2193 if err != nil { 2194 return err 2195 } 2196 2197 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.RemoveMembershipOption{ 2198 NIC: tcpip.NICID(req.InterfaceIndex), 2199 MulticastAddr: tcpip.AddrFrom16(req.MulticastAddr), 2200 })) 2201 2202 case linux.IPV6_IPSEC_POLICY, 2203 linux.IPV6_JOIN_ANYCAST, 2204 linux.IPV6_LEAVE_ANYCAST, 2205 // TODO(b/148887420): Add support for IPV6_PKTINFO. 2206 linux.IPV6_PKTINFO, 2207 linux.IPV6_ROUTER_ALERT, 2208 linux.IPV6_XFRM_POLICY, 2209 linux.MCAST_BLOCK_SOURCE, 2210 linux.MCAST_JOIN_GROUP, 2211 linux.MCAST_JOIN_SOURCE_GROUP, 2212 linux.MCAST_LEAVE_GROUP, 2213 linux.MCAST_LEAVE_SOURCE_GROUP, 2214 linux.MCAST_UNBLOCK_SOURCE: 2215 // Not supported. 2216 2217 case linux.IPV6_RECVORIGDSTADDR: 2218 if len(optVal) < sizeOfInt32 { 2219 return syserr.ErrInvalidArgument 2220 } 2221 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2222 2223 ep.SocketOptions().SetReceiveOriginalDstAddress(v != 0) 2224 return nil 2225 2226 case linux.IPV6_RECVPKTINFO: 2227 if len(optVal) < sizeOfInt32 { 2228 return syserr.ErrInvalidArgument 2229 } 2230 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2231 2232 ep.SocketOptions().SetIPv6ReceivePacketInfo(v != 0) 2233 return nil 2234 2235 case linux.IPV6_UNICAST_HOPS: 2236 if len(optVal) < sizeOfInt32 { 2237 return syserr.ErrInvalidArgument 2238 } 2239 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2240 if v < -1 || v > 255 { 2241 return syserr.ErrInvalidArgument 2242 } 2243 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.IPv6HopLimitOption, int(v))) 2244 2245 case linux.IPV6_RECVHOPLIMIT: 2246 v, err := parseIntOrChar(optVal) 2247 if err != nil { 2248 return err 2249 } 2250 2251 ep.SocketOptions().SetReceiveHopLimit(v != 0) 2252 return nil 2253 2254 case linux.IPV6_TCLASS: 2255 if len(optVal) < sizeOfInt32 { 2256 return syserr.ErrInvalidArgument 2257 } 2258 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2259 if v < -1 || v > 255 { 2260 return syserr.ErrInvalidArgument 2261 } 2262 if v == -1 { 2263 v = 0 2264 } 2265 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.IPv6TrafficClassOption, int(v))) 2266 2267 case linux.IPV6_RECVTCLASS: 2268 v, err := parseIntOrChar(optVal) 2269 if err != nil { 2270 return err 2271 } 2272 2273 ep.SocketOptions().SetReceiveTClass(v != 0) 2274 return nil 2275 case linux.IPV6_RECVERR: 2276 if len(optVal) == 0 { 2277 return nil 2278 } 2279 v, err := parseIntOrChar(optVal) 2280 if err != nil { 2281 return err 2282 } 2283 ep.SocketOptions().SetIPv6RecvError(v != 0) 2284 return nil 2285 2286 case linux.IP6T_SO_SET_REPLACE: 2287 if len(optVal) < linux.SizeOfIP6TReplace { 2288 return syserr.ErrInvalidArgument 2289 } 2290 2291 // Only valid for raw IPv6 sockets. 2292 if !socket.IsRaw(s) { 2293 return syserr.ErrProtocolNotAvailable 2294 } 2295 2296 stk := inet.StackFromContext(t) 2297 if stk == nil { 2298 return syserr.ErrNoDevice 2299 } 2300 // Stack must be a netstack stack. 2301 return netfilter.SetEntries(t, stk.(*Stack).Stack, optVal, true) 2302 2303 case linux.IP6T_SO_SET_ADD_COUNTERS: 2304 log.Infof("IP6T_SO_SET_ADD_COUNTERS is not supported") 2305 return nil 2306 } 2307 2308 return nil 2309 } 2310 2311 var ( 2312 inetMulticastRequestSize = (*linux.InetMulticastRequest)(nil).SizeBytes() 2313 inetMulticastRequestWithNICSize = (*linux.InetMulticastRequestWithNIC)(nil).SizeBytes() 2314 inet6MulticastRequestSize = (*linux.Inet6MulticastRequest)(nil).SizeBytes() 2315 ) 2316 2317 // copyInMulticastRequest copies in a variable-size multicast request. The 2318 // kernel determines which structure was passed by its length. IP_MULTICAST_IF 2319 // supports ip_mreqn, ip_mreq and in_addr, while IP_ADD_MEMBERSHIP and 2320 // IP_DROP_MEMBERSHIP only support ip_mreqn and ip_mreq. To handle this, 2321 // allowAddr controls whether in_addr is accepted or rejected. 2322 func copyInMulticastRequest(optVal []byte, allowAddr bool) (linux.InetMulticastRequestWithNIC, *syserr.Error) { 2323 if len(optVal) < len(linux.InetAddr{}) { 2324 return linux.InetMulticastRequestWithNIC{}, syserr.ErrInvalidArgument 2325 } 2326 2327 if len(optVal) < inetMulticastRequestSize { 2328 if !allowAddr { 2329 return linux.InetMulticastRequestWithNIC{}, syserr.ErrInvalidArgument 2330 } 2331 2332 var req linux.InetMulticastRequestWithNIC 2333 copy(req.InterfaceAddr[:], optVal) 2334 return req, nil 2335 } 2336 2337 if len(optVal) >= inetMulticastRequestWithNICSize { 2338 var req linux.InetMulticastRequestWithNIC 2339 req.UnmarshalUnsafe(optVal) 2340 return req, nil 2341 } 2342 2343 var req linux.InetMulticastRequestWithNIC 2344 req.InetMulticastRequest.UnmarshalUnsafe(optVal) 2345 return req, nil 2346 } 2347 2348 func copyInMulticastV6Request(optVal []byte) (linux.Inet6MulticastRequest, *syserr.Error) { 2349 if len(optVal) < inet6MulticastRequestSize { 2350 return linux.Inet6MulticastRequest{}, syserr.ErrInvalidArgument 2351 } 2352 2353 var req linux.Inet6MulticastRequest 2354 req.UnmarshalUnsafe(optVal) 2355 return req, nil 2356 } 2357 2358 // parseIntOrChar copies either a 32-bit int or an 8-bit uint out of buf. 2359 // 2360 // net/ipv4/ip_sockglue.c:do_ip_setsockopt does this for its socket options. 2361 func parseIntOrChar(buf []byte) (int32, *syserr.Error) { 2362 if len(buf) == 0 { 2363 return 0, syserr.ErrInvalidArgument 2364 } 2365 2366 if len(buf) >= sizeOfInt32 { 2367 return int32(hostarch.ByteOrder.Uint32(buf)), nil 2368 } 2369 2370 return int32(buf[0]), nil 2371 } 2372 2373 // setSockOptIP implements SetSockOpt when level is SOL_IP. 2374 func setSockOptIP(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 2375 if _, ok := ep.(tcpip.Endpoint); !ok { 2376 log.Warningf("SOL_IP options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 2377 return syserr.ErrUnknownProtocolOption 2378 } 2379 2380 switch name { 2381 case linux.IP_MULTICAST_TTL: 2382 v, err := parseIntOrChar(optVal) 2383 if err != nil { 2384 return err 2385 } 2386 2387 if v == -1 { 2388 // Linux translates -1 to 1. 2389 v = 1 2390 } 2391 if v < 0 || v > 255 { 2392 return syserr.ErrInvalidArgument 2393 } 2394 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.MulticastTTLOption, int(v))) 2395 2396 case linux.IP_ADD_MEMBERSHIP: 2397 req, err := copyInMulticastRequest(optVal, false /* allowAddr */) 2398 if err != nil { 2399 return err 2400 } 2401 2402 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.AddMembershipOption{ 2403 NIC: tcpip.NICID(req.InterfaceIndex), 2404 // TODO(igudger): Change AddMembership to use the standard 2405 // any address representation. 2406 InterfaceAddr: tcpip.AddrFrom4(req.InterfaceAddr), 2407 MulticastAddr: tcpip.AddrFrom4(req.MulticastAddr), 2408 })) 2409 2410 case linux.IP_DROP_MEMBERSHIP: 2411 req, err := copyInMulticastRequest(optVal, false /* allowAddr */) 2412 if err != nil { 2413 return err 2414 } 2415 2416 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.RemoveMembershipOption{ 2417 NIC: tcpip.NICID(req.InterfaceIndex), 2418 // TODO(igudger): Change DropMembership to use the standard 2419 // any address representation. 2420 InterfaceAddr: tcpip.AddrFrom4(req.InterfaceAddr), 2421 MulticastAddr: tcpip.AddrFrom4(req.MulticastAddr), 2422 })) 2423 2424 case linux.IP_MULTICAST_IF: 2425 req, err := copyInMulticastRequest(optVal, true /* allowAddr */) 2426 if err != nil { 2427 return err 2428 } 2429 2430 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.MulticastInterfaceOption{ 2431 NIC: tcpip.NICID(req.InterfaceIndex), 2432 InterfaceAddr: socket.BytesToIPAddress(req.InterfaceAddr[:]), 2433 })) 2434 2435 case linux.IP_MULTICAST_LOOP: 2436 v, err := parseIntOrChar(optVal) 2437 if err != nil { 2438 return err 2439 } 2440 2441 ep.SocketOptions().SetMulticastLoop(v != 0) 2442 return nil 2443 2444 case linux.MCAST_JOIN_GROUP: 2445 // FIXME(b/124219304): Implement MCAST_JOIN_GROUP. 2446 return syserr.ErrInvalidArgument 2447 2448 case linux.IP_TTL: 2449 v, err := parseIntOrChar(optVal) 2450 if err != nil { 2451 return err 2452 } 2453 2454 // -1 means default TTL. 2455 if v == -1 { 2456 v = 0 2457 } else if v < 1 || v > 255 { 2458 return syserr.ErrInvalidArgument 2459 } 2460 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.IPv4TTLOption, int(v))) 2461 2462 case linux.IP_RECVTTL: 2463 v, err := parseIntOrChar(optVal) 2464 if err != nil { 2465 return err 2466 } 2467 ep.SocketOptions().SetReceiveTTL(v != 0) 2468 return nil 2469 2470 case linux.IP_TOS: 2471 if len(optVal) == 0 { 2472 return nil 2473 } 2474 v, err := parseIntOrChar(optVal) 2475 if err != nil { 2476 return err 2477 } 2478 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.IPv4TOSOption, int(v))) 2479 2480 case linux.IP_RECVTOS: 2481 v, err := parseIntOrChar(optVal) 2482 if err != nil { 2483 return err 2484 } 2485 ep.SocketOptions().SetReceiveTOS(v != 0) 2486 return nil 2487 2488 case linux.IP_RECVERR: 2489 if len(optVal) == 0 { 2490 return nil 2491 } 2492 v, err := parseIntOrChar(optVal) 2493 if err != nil { 2494 return err 2495 } 2496 ep.SocketOptions().SetIPv4RecvError(v != 0) 2497 return nil 2498 2499 case linux.IP_PKTINFO: 2500 if len(optVal) == 0 { 2501 return nil 2502 } 2503 v, err := parseIntOrChar(optVal) 2504 if err != nil { 2505 return err 2506 } 2507 ep.SocketOptions().SetReceivePacketInfo(v != 0) 2508 return nil 2509 2510 case linux.IP_HDRINCL: 2511 if len(optVal) == 0 { 2512 return nil 2513 } 2514 v, err := parseIntOrChar(optVal) 2515 if err != nil { 2516 return err 2517 } 2518 ep.SocketOptions().SetHeaderIncluded(v != 0) 2519 return nil 2520 2521 case linux.IP_RECVORIGDSTADDR: 2522 if len(optVal) == 0 { 2523 return nil 2524 } 2525 v, err := parseIntOrChar(optVal) 2526 if err != nil { 2527 return err 2528 } 2529 2530 ep.SocketOptions().SetReceiveOriginalDstAddress(v != 0) 2531 return nil 2532 2533 case linux.IPT_SO_SET_REPLACE: 2534 if len(optVal) < linux.SizeOfIPTReplace { 2535 return syserr.ErrInvalidArgument 2536 } 2537 2538 // Only valid for raw IPv4 sockets. 2539 if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { 2540 return syserr.ErrProtocolNotAvailable 2541 } 2542 2543 stk := inet.StackFromContext(t) 2544 if stk == nil { 2545 return syserr.ErrNoDevice 2546 } 2547 // Stack must be a netstack stack. 2548 return netfilter.SetEntries(t, stk.(*Stack).Stack, optVal, false) 2549 2550 case linux.IPT_SO_SET_ADD_COUNTERS: 2551 log.Infof("IPT_SO_SET_ADD_COUNTERS is not supported") 2552 return nil 2553 2554 case linux.IP_ADD_SOURCE_MEMBERSHIP, 2555 linux.IP_BIND_ADDRESS_NO_PORT, 2556 linux.IP_BLOCK_SOURCE, 2557 linux.IP_CHECKSUM, 2558 linux.IP_DROP_SOURCE_MEMBERSHIP, 2559 linux.IP_FREEBIND, 2560 linux.IP_IPSEC_POLICY, 2561 linux.IP_MINTTL, 2562 linux.IP_MSFILTER, 2563 linux.IP_MTU_DISCOVER, 2564 linux.IP_MULTICAST_ALL, 2565 linux.IP_NODEFRAG, 2566 linux.IP_OPTIONS, 2567 linux.IP_PASSSEC, 2568 linux.IP_RECVFRAGSIZE, 2569 linux.IP_RECVOPTS, 2570 linux.IP_RETOPTS, 2571 linux.IP_TRANSPARENT, 2572 linux.IP_UNBLOCK_SOURCE, 2573 linux.IP_UNICAST_IF, 2574 linux.IP_XFRM_POLICY, 2575 linux.MCAST_BLOCK_SOURCE, 2576 linux.MCAST_JOIN_SOURCE_GROUP, 2577 linux.MCAST_LEAVE_GROUP, 2578 linux.MCAST_LEAVE_SOURCE_GROUP, 2579 linux.MCAST_MSFILTER, 2580 linux.MCAST_UNBLOCK_SOURCE: 2581 // Not supported. 2582 } 2583 2584 return nil 2585 } 2586 2587 // GetSockName implements the linux syscall getsockname(2) for sockets backed by 2588 // tcpip.Endpoint. 2589 func (s *sock) GetSockName(*kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { 2590 addr, err := s.Endpoint.GetLocalAddress() 2591 if err != nil { 2592 return nil, 0, syserr.TranslateNetstackError(err) 2593 } 2594 2595 a, l := socket.ConvertAddress(s.family, addr) 2596 return a, l, nil 2597 } 2598 2599 // GetPeerName implements the linux syscall getpeername(2) for sockets backed by 2600 // tcpip.Endpoint. 2601 func (s *sock) GetPeerName(*kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { 2602 addr, err := s.Endpoint.GetRemoteAddress() 2603 if err != nil { 2604 return nil, 0, syserr.TranslateNetstackError(err) 2605 } 2606 2607 a, l := socket.ConvertAddress(s.family, addr) 2608 return a, l, nil 2609 } 2610 2611 func (s *sock) fillCmsgInq(cmsg *socket.ControlMessages) { 2612 if !s.sockOptInq { 2613 return 2614 } 2615 rcvBufUsed, err := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption) 2616 if err != nil { 2617 return 2618 } 2619 cmsg.IP.HasInq = true 2620 cmsg.IP.Inq = int32(rcvBufUsed) 2621 } 2622 2623 func toLinuxPacketType(pktType tcpip.PacketType) uint8 { 2624 switch pktType { 2625 case tcpip.PacketHost: 2626 return linux.PACKET_HOST 2627 case tcpip.PacketOtherHost: 2628 return linux.PACKET_OTHERHOST 2629 case tcpip.PacketOutgoing: 2630 return linux.PACKET_OUTGOING 2631 case tcpip.PacketBroadcast: 2632 return linux.PACKET_BROADCAST 2633 case tcpip.PacketMulticast: 2634 return linux.PACKET_MULTICAST 2635 default: 2636 panic(fmt.Sprintf("unknown packet type: %d", pktType)) 2637 } 2638 } 2639 2640 // nonBlockingRead issues a non-blocking read. 2641 // 2642 // TODO(b/78348848): Support timestamps for stream sockets. 2643 func (s *sock) nonBlockingRead(ctx context.Context, dst usermem.IOSequence, peek, trunc, senderRequested bool) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { 2644 isPacket := s.isPacketBased() 2645 2646 readOptions := tcpip.ReadOptions{ 2647 Peek: peek, 2648 NeedRemoteAddr: senderRequested, 2649 NeedLinkPacketInfo: isPacket, 2650 } 2651 2652 // TCP sockets discard the data if MSG_TRUNC is set. 2653 // 2654 // This behavior is documented in man 7 tcp: 2655 // Since version 2.4, Linux supports the use of MSG_TRUNC in the flags 2656 // argument of recv(2) (and recvmsg(2)). This flag causes the received 2657 // bytes of data to be discarded, rather than passed back in a 2658 // caller-supplied buffer. 2659 var w io.Writer 2660 if !isPacket && trunc { 2661 w = &tcpip.LimitedWriter{ 2662 W: ioutil.Discard, 2663 N: dst.NumBytes(), 2664 } 2665 } else { 2666 w = dst.Writer(ctx) 2667 } 2668 2669 s.readMu.Lock() 2670 defer s.readMu.Unlock() 2671 2672 res, err := s.Endpoint.Read(w, readOptions) 2673 if _, ok := err.(*tcpip.ErrBadBuffer); ok && dst.NumBytes() == 0 { 2674 err = nil 2675 } 2676 if err != nil { 2677 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.TranslateNetstackError(err) 2678 } 2679 // Set the control message, even if 0 bytes were read. 2680 s.updateTimestamp(res.ControlMessages) 2681 2682 if isPacket { 2683 var addr linux.SockAddr 2684 var addrLen uint32 2685 if senderRequested { 2686 addr, addrLen = socket.ConvertAddress(s.family, res.RemoteAddr) 2687 switch v := addr.(type) { 2688 case *linux.SockAddrLink: 2689 v.Protocol = socket.Htons(uint16(res.LinkPacketInfo.Protocol)) 2690 v.PacketType = toLinuxPacketType(res.LinkPacketInfo.PktType) 2691 } 2692 } 2693 2694 msgLen := res.Count 2695 if trunc { 2696 msgLen = res.Total 2697 } 2698 2699 var flags int 2700 if res.Total > res.Count { 2701 flags |= linux.MSG_TRUNC 2702 } 2703 2704 return msgLen, flags, addr, addrLen, s.netstackToLinuxControlMessages(res.ControlMessages), nil 2705 } 2706 2707 if peek { 2708 // MSG_TRUNC with MSG_PEEK on a TCP socket returns the 2709 // amount that could be read, and does not write to buffer. 2710 if trunc { 2711 // TCP endpoint does not return the total bytes in buffer as numTotal. 2712 // We need to query it from socket option. 2713 rql, err := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption) 2714 if err != nil { 2715 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.TranslateNetstackError(err) 2716 } 2717 msgLen := int(dst.NumBytes()) 2718 if msgLen > rql { 2719 msgLen = rql 2720 } 2721 return msgLen, 0, nil, 0, socket.ControlMessages{}, nil 2722 } 2723 } else if n := res.Count; n != 0 { 2724 s.Endpoint.ModerateRecvBuf(n) 2725 } 2726 2727 cmsg := s.netstackToLinuxControlMessages(res.ControlMessages) 2728 s.fillCmsgInq(&cmsg) 2729 return res.Count, 0, nil, 0, cmsg, syserr.TranslateNetstackError(err) 2730 } 2731 2732 func (s *sock) netstackToLinuxControlMessages(cm tcpip.ReceivableControlMessages) socket.ControlMessages { 2733 readCM := socket.NewIPControlMessages(s.family, cm) 2734 return socket.ControlMessages{ 2735 IP: socket.IPControlMessages{ 2736 HasTimestamp: readCM.HasTimestamp && s.sockOptTimestamp, 2737 Timestamp: readCM.Timestamp, 2738 HasInq: readCM.HasInq, 2739 Inq: readCM.Inq, 2740 HasTOS: readCM.HasTOS, 2741 TOS: readCM.TOS, 2742 HasTClass: readCM.HasTClass, 2743 TClass: readCM.TClass, 2744 HasTTL: readCM.HasTTL, 2745 TTL: readCM.TTL, 2746 HasHopLimit: readCM.HasHopLimit, 2747 HopLimit: readCM.HopLimit, 2748 HasIPPacketInfo: readCM.HasIPPacketInfo, 2749 PacketInfo: readCM.PacketInfo, 2750 HasIPv6PacketInfo: readCM.HasIPv6PacketInfo, 2751 IPv6PacketInfo: readCM.IPv6PacketInfo, 2752 OriginalDstAddress: readCM.OriginalDstAddress, 2753 SockErr: readCM.SockErr, 2754 }, 2755 } 2756 } 2757 2758 func (s *sock) linuxToNetstackControlMessages(cm socket.ControlMessages) tcpip.SendableControlMessages { 2759 return tcpip.SendableControlMessages{ 2760 HasTTL: cm.IP.HasTTL, 2761 TTL: uint8(cm.IP.TTL), 2762 HasHopLimit: cm.IP.HasHopLimit, 2763 HopLimit: uint8(cm.IP.HopLimit), 2764 } 2765 } 2766 2767 // updateTimestamp sets the timestamp for SIOCGSTAMP. It should be called after 2768 // successfully writing packet data out to userspace. 2769 // 2770 // Precondition: s.readMu must be locked. 2771 func (s *sock) updateTimestamp(cm tcpip.ReceivableControlMessages) { 2772 // Save the SIOCGSTAMP timestamp only if SO_TIMESTAMP is disabled. 2773 if !s.sockOptTimestamp { 2774 s.timestampValid = true 2775 s.timestamp = cm.Timestamp 2776 } 2777 } 2778 2779 // dequeueErr is analogous to net/core/skbuff.c:sock_dequeue_err_skb(). 2780 func (s *sock) dequeueErr() *tcpip.SockError { 2781 so := s.Endpoint.SocketOptions() 2782 err := so.DequeueErr() 2783 if err == nil { 2784 return nil 2785 } 2786 2787 // Update socket error to reflect ICMP errors in queue. 2788 if nextErr := so.PeekErr(); nextErr != nil && nextErr.Cause.Origin().IsICMPErr() { 2789 so.SetLastError(nextErr.Err) 2790 } else if err.Cause.Origin().IsICMPErr() { 2791 so.SetLastError(nil) 2792 } 2793 return err 2794 } 2795 2796 // addrFamilyFromNetProto returns the address family identifier for the given 2797 // network protocol. 2798 func addrFamilyFromNetProto(net tcpip.NetworkProtocolNumber) int { 2799 switch net { 2800 case header.IPv4ProtocolNumber: 2801 return linux.AF_INET 2802 case header.IPv6ProtocolNumber: 2803 return linux.AF_INET6 2804 default: 2805 panic(fmt.Sprintf("invalid net proto for addr family inference: %d", net)) 2806 } 2807 } 2808 2809 // recvErr handles MSG_ERRQUEUE for recvmsg(2). 2810 // This is analogous to net/ipv4/ip_sockglue.c:ip_recv_error(). 2811 func (s *sock) recvErr(t *kernel.Task, dst usermem.IOSequence) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { 2812 sockErr := s.dequeueErr() 2813 if sockErr == nil { 2814 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain 2815 } 2816 if sockErr.Payload != nil { 2817 defer sockErr.Payload.Release() 2818 } 2819 2820 // The payload of the original packet that caused the error is passed as 2821 // normal data via msg_iovec. -- recvmsg(2) 2822 msgFlags := linux.MSG_ERRQUEUE 2823 if int(dst.NumBytes()) < sockErr.Payload.Size() { 2824 msgFlags |= linux.MSG_TRUNC 2825 } 2826 n, err := dst.CopyOut(t, sockErr.Payload.AsSlice()) 2827 2828 // The original destination address of the datagram that caused the error is 2829 // supplied via msg_name. -- recvmsg(2) 2830 dstAddr, dstAddrLen := socket.ConvertAddress(addrFamilyFromNetProto(sockErr.NetProto), sockErr.Dst) 2831 cmgs := socket.ControlMessages{IP: socket.NewIPControlMessages(s.family, tcpip.ReceivableControlMessages{SockErr: sockErr})} 2832 return n, msgFlags, dstAddr, dstAddrLen, cmgs, syserr.FromError(err) 2833 } 2834 2835 // RecvMsg implements the linux syscall recvmsg(2) for sockets backed by 2836 // tcpip.Endpoint. 2837 func (s *sock) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, _ uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) { 2838 if flags&linux.MSG_ERRQUEUE != 0 { 2839 return s.recvErr(t, dst) 2840 } 2841 2842 trunc := flags&linux.MSG_TRUNC != 0 2843 peek := flags&linux.MSG_PEEK != 0 2844 dontWait := flags&linux.MSG_DONTWAIT != 0 2845 waitAll := flags&linux.MSG_WAITALL != 0 2846 if senderRequested && !s.isPacketBased() { 2847 // Stream sockets ignore the sender address. 2848 senderRequested = false 2849 } 2850 n, msgFlags, senderAddr, senderAddrLen, controlMessages, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested) 2851 2852 if s.isPacketBased() && err == syserr.ErrClosedForReceive && flags&linux.MSG_DONTWAIT != 0 { 2853 // In this situation we should return EAGAIN. 2854 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain 2855 } 2856 2857 if err != nil && (err != syserr.ErrWouldBlock || dontWait) { 2858 // Read failed and we should not retry. 2859 return 0, 0, nil, 0, socket.ControlMessages{}, err 2860 } 2861 2862 if err == nil && (dontWait || !waitAll || s.isPacketBased() || int64(n) >= dst.NumBytes()) { 2863 // We got all the data we need. 2864 return 2865 } 2866 2867 // Don't overwrite any data we received. 2868 dst = dst.DropFirst(n) 2869 2870 // We'll have to block. Register for notifications and keep trying to 2871 // send all the data. 2872 e, ch := waiter.NewChannelEntry(waiter.ReadableEvents) 2873 s.EventRegister(&e) 2874 defer s.EventUnregister(&e) 2875 2876 for { 2877 var rn int 2878 rn, msgFlags, senderAddr, senderAddrLen, controlMessages, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested) 2879 n += rn 2880 if err != nil && err != syserr.ErrWouldBlock { 2881 // Always stop on errors other than would block as we generally 2882 // won't be able to get any more data. Eat the error if we got 2883 // any data. 2884 if n > 0 { 2885 err = nil 2886 } 2887 return 2888 } 2889 if err == nil && (s.isPacketBased() || !waitAll || int64(rn) >= dst.NumBytes()) { 2890 // We got all the data we need. 2891 return 2892 } 2893 dst = dst.DropFirst(rn) 2894 2895 if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { 2896 if n > 0 { 2897 return n, msgFlags, senderAddr, senderAddrLen, controlMessages, nil 2898 } 2899 if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 2900 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain 2901 } 2902 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) 2903 } 2904 } 2905 } 2906 2907 // SendMsg implements the linux syscall sendmsg(2) for sockets backed by 2908 // tcpip.Endpoint. 2909 func (s *sock) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { 2910 // Reject Unix control messages. 2911 if !controlMessages.Unix.Empty() { 2912 return 0, syserr.ErrInvalidArgument 2913 } 2914 2915 var addr *tcpip.FullAddress 2916 if len(to) > 0 { 2917 addrBuf, family, err := socket.AddressAndFamily(to) 2918 if err != nil { 2919 return 0, err 2920 } 2921 if !s.checkFamily(family, false /* exact */) { 2922 return 0, syserr.ErrInvalidArgument 2923 } 2924 addrBuf = s.mapFamily(addrBuf, family) 2925 2926 addr = &addrBuf 2927 } 2928 2929 opts := tcpip.WriteOptions{ 2930 To: addr, 2931 More: flags&linux.MSG_MORE != 0, 2932 EndOfRecord: flags&linux.MSG_EOR != 0, 2933 ControlMessages: s.linuxToNetstackControlMessages(controlMessages), 2934 } 2935 2936 r := src.Reader(t) 2937 var ( 2938 total int64 2939 entry waiter.Entry 2940 ch <-chan struct{} 2941 ) 2942 for { 2943 n, err := s.Endpoint.Write(r, opts) 2944 total += n 2945 if flags&linux.MSG_DONTWAIT != 0 { 2946 return int(total), syserr.TranslateNetstackError(err) 2947 } 2948 block := true 2949 switch err.(type) { 2950 case nil: 2951 block = total != src.NumBytes() 2952 case *tcpip.ErrWouldBlock: 2953 default: 2954 block = false 2955 } 2956 if block { 2957 if ch == nil { 2958 // We'll have to block. Register for notification and keep trying to 2959 // send all the data. 2960 entry, ch = waiter.NewChannelEntry(waiter.WritableEvents) 2961 s.EventRegister(&entry) 2962 defer s.EventUnregister(&entry) 2963 } else { 2964 // Don't wait immediately after registration in case more data 2965 // became available between when we last checked and when we setup 2966 // the notification. 2967 if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { 2968 if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 2969 return int(total), syserr.ErrTryAgain 2970 } 2971 // handleIOError will consume errors from t.Block if needed. 2972 return int(total), syserr.FromError(err) 2973 } 2974 } 2975 continue 2976 } 2977 return int(total), syserr.TranslateNetstackError(err) 2978 } 2979 } 2980 2981 // Ioctl implements vfs.FileDescriptionImpl. 2982 func (s *sock) Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { 2983 t := kernel.TaskFromContext(ctx) 2984 if t == nil { 2985 panic("ioctl(2) may only be called from a task goroutine") 2986 } 2987 2988 // SIOCGSTAMP is implemented by netstack rather than all commonEndpoint 2989 // sockets. 2990 // TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP. 2991 switch args[1].Int() { 2992 case linux.SIOCGSTAMP: 2993 s.readMu.Lock() 2994 defer s.readMu.Unlock() 2995 if !s.timestampValid { 2996 return 0, linuxerr.ENOENT 2997 } 2998 2999 tv := linux.NsecToTimeval(s.timestamp.UnixNano()) 3000 _, err := tv.CopyOut(t, args[2].Pointer()) 3001 return 0, err 3002 3003 case linux.TIOCINQ: 3004 v, terr := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption) 3005 if terr != nil { 3006 return 0, syserr.TranslateNetstackError(terr).ToError() 3007 } 3008 3009 if v > math.MaxInt32 { 3010 v = math.MaxInt32 3011 } 3012 3013 // Copy result to userspace. 3014 vP := primitive.Int32(v) 3015 _, err := vP.CopyOut(t, args[2].Pointer()) 3016 return 0, err 3017 } 3018 3019 return Ioctl(ctx, s.Endpoint, uio, sysno, args) 3020 } 3021 3022 // Ioctl performs a socket ioctl. 3023 func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { 3024 t := kernel.TaskFromContext(ctx) 3025 if t == nil { 3026 panic("ioctl(2) may only be called from a task goroutine") 3027 } 3028 3029 switch arg := int(args[1].Int()); arg { 3030 case linux.SIOCGIFFLAGS, 3031 linux.SIOCGIFADDR, 3032 linux.SIOCGIFBRDADDR, 3033 linux.SIOCGIFDSTADDR, 3034 linux.SIOCGIFHWADDR, 3035 linux.SIOCGIFINDEX, 3036 linux.SIOCGIFMAP, 3037 linux.SIOCGIFMETRIC, 3038 linux.SIOCGIFMTU, 3039 linux.SIOCGIFNAME, 3040 linux.SIOCGIFNETMASK, 3041 linux.SIOCGIFTXQLEN, 3042 linux.SIOCETHTOOL: 3043 3044 var ifr linux.IFReq 3045 if _, err := ifr.CopyIn(t, args[2].Pointer()); err != nil { 3046 return 0, err 3047 } 3048 if err := interfaceIoctl(ctx, io, arg, &ifr); err != nil { 3049 return 0, err.ToError() 3050 } 3051 _, err := ifr.CopyOut(t, args[2].Pointer()) 3052 return 0, err 3053 3054 case linux.SIOCGIFCONF: 3055 // Return a list of interface addresses or the buffer size 3056 // necessary to hold the list. 3057 var ifc linux.IFConf 3058 if _, err := ifc.CopyIn(t, args[2].Pointer()); err != nil { 3059 return 0, err 3060 } 3061 3062 if err := ifconfIoctl(ctx, t, io, &ifc); err != nil { 3063 return 0, err 3064 } 3065 3066 _, err := ifc.CopyOut(t, args[2].Pointer()) 3067 return 0, err 3068 3069 case linux.TIOCINQ: 3070 v, terr := ep.GetSockOptInt(tcpip.ReceiveQueueSizeOption) 3071 if terr != nil { 3072 return 0, syserr.TranslateNetstackError(terr).ToError() 3073 } 3074 3075 if v > math.MaxInt32 { 3076 v = math.MaxInt32 3077 } 3078 // Copy result to userspace. 3079 vP := primitive.Int32(v) 3080 _, err := vP.CopyOut(t, args[2].Pointer()) 3081 return 0, err 3082 3083 case linux.TIOCOUTQ: 3084 v, terr := ep.GetSockOptInt(tcpip.SendQueueSizeOption) 3085 if terr != nil { 3086 return 0, syserr.TranslateNetstackError(terr).ToError() 3087 } 3088 3089 if v > math.MaxInt32 { 3090 v = math.MaxInt32 3091 } 3092 3093 // Copy result to userspace. 3094 vP := primitive.Int32(v) 3095 _, err := vP.CopyOut(t, args[2].Pointer()) 3096 return 0, err 3097 3098 case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG: 3099 // Not supported. 3100 } 3101 3102 return 0, linuxerr.ENOTTY 3103 } 3104 3105 // interfaceIoctl implements interface requests. 3106 func interfaceIoctl(ctx context.Context, _ usermem.IO, arg int, ifr *linux.IFReq) *syserr.Error { 3107 var ( 3108 iface inet.Interface 3109 index int32 3110 found bool 3111 ) 3112 3113 // Find the relevant device. 3114 stk := inet.StackFromContext(ctx) 3115 if stk == nil { 3116 return syserr.ErrNoDevice 3117 } 3118 3119 // SIOCGIFNAME uses ifr.ifr_ifindex rather than ifr.ifr_name to 3120 // identify a device. 3121 if arg == linux.SIOCGIFNAME { 3122 // Gets the name of the interface given the interface index 3123 // stored in ifr_ifindex. 3124 index = int32(hostarch.ByteOrder.Uint32(ifr.Data[:4])) 3125 if iface, ok := stk.Interfaces()[index]; ok { 3126 ifr.SetName(iface.Name) 3127 return nil 3128 } 3129 return syserr.ErrNoDevice 3130 } 3131 3132 // Find the relevant device. 3133 for index, iface = range stk.Interfaces() { 3134 if iface.Name == ifr.Name() { 3135 found = true 3136 break 3137 } 3138 } 3139 if !found { 3140 return syserr.ErrNoDevice 3141 } 3142 3143 switch arg { 3144 case linux.SIOCGIFINDEX: 3145 // Copy out the index to the data. 3146 hostarch.ByteOrder.PutUint32(ifr.Data[:], uint32(index)) 3147 3148 case linux.SIOCGIFHWADDR: 3149 // Copy the hardware address out. 3150 // 3151 // Refer: https://linux.die.net/man/7/netdevice 3152 // SIOCGIFHWADDR, SIOCSIFHWADDR 3153 // 3154 // Get or set the hardware address of a device using 3155 // ifr_hwaddr. The hardware address is specified in a struct 3156 // sockaddr. sa_family contains the ARPHRD_* device type, 3157 // sa_data the L2 hardware address starting from byte 0. Setting 3158 // the hardware address is a privileged operation. 3159 hostarch.ByteOrder.PutUint16(ifr.Data[:], iface.DeviceType) 3160 n := copy(ifr.Data[2:], iface.Addr) 3161 for i := 2 + n; i < len(ifr.Data); i++ { 3162 ifr.Data[i] = 0 // Clear padding. 3163 } 3164 3165 case linux.SIOCGIFFLAGS: 3166 f, err := interfaceStatusFlags(stk, iface.Name) 3167 if err != nil { 3168 return err 3169 } 3170 // Drop the flags that don't fit in the size that we need to return. This 3171 // matches Linux behavior. 3172 hostarch.ByteOrder.PutUint16(ifr.Data[:2], uint16(f)) 3173 3174 case linux.SIOCGIFADDR: 3175 // Copy the IPv4 address out. 3176 for _, addr := range stk.InterfaceAddrs()[index] { 3177 // This ioctl is only compatible with AF_INET addresses. 3178 if addr.Family != linux.AF_INET { 3179 continue 3180 } 3181 copy(ifr.Data[4:8], addr.Addr) 3182 break 3183 } 3184 3185 case linux.SIOCGIFMETRIC: 3186 // Gets the metric of the device. As per netdevice(7), this 3187 // always just sets ifr_metric to 0. 3188 hostarch.ByteOrder.PutUint32(ifr.Data[:4], 0) 3189 3190 case linux.SIOCGIFMTU: 3191 // Gets the MTU of the device. 3192 hostarch.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU) 3193 3194 case linux.SIOCGIFMAP: 3195 // Gets the hardware parameters of the device. 3196 // TODO(gvisor.dev/issue/505): Implement. 3197 3198 case linux.SIOCGIFTXQLEN: 3199 // Gets the transmit queue length of the device. 3200 // TODO(gvisor.dev/issue/505): Implement. 3201 3202 case linux.SIOCGIFDSTADDR: 3203 // Gets the destination address of a point-to-point device. 3204 // TODO(gvisor.dev/issue/505): Implement. 3205 3206 case linux.SIOCGIFBRDADDR: 3207 // Gets the broadcast address of a device. 3208 // TODO(gvisor.dev/issue/505): Implement. 3209 3210 case linux.SIOCGIFNETMASK: 3211 // Gets the network mask of a device. 3212 for _, addr := range stk.InterfaceAddrs()[index] { 3213 // This ioctl is only compatible with AF_INET addresses. 3214 if addr.Family != linux.AF_INET { 3215 continue 3216 } 3217 // Populate ifr.ifr_netmask (type sockaddr). 3218 hostarch.ByteOrder.PutUint16(ifr.Data[0:], uint16(linux.AF_INET)) 3219 hostarch.ByteOrder.PutUint16(ifr.Data[2:], 0) 3220 var mask uint32 = 0xffffffff << (32 - addr.PrefixLen) 3221 // Netmask is expected to be returned as a big endian 3222 // value. 3223 binary.BigEndian.PutUint32(ifr.Data[4:8], mask) 3224 break 3225 } 3226 3227 case linux.SIOCETHTOOL: 3228 // Stubbed out for now, Ideally we should implement the required 3229 // sub-commands for ETHTOOL 3230 // 3231 // See: 3232 // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/net/core/dev_ioctl.c 3233 return syserr.ErrEndpointOperation 3234 3235 default: 3236 // Not a valid call. 3237 return syserr.ErrInvalidArgument 3238 } 3239 3240 return nil 3241 } 3242 3243 // ifconfIoctl populates a struct ifconf for the SIOCGIFCONF ioctl. 3244 func ifconfIoctl(ctx context.Context, t *kernel.Task, _ usermem.IO, ifc *linux.IFConf) error { 3245 // If Ptr is NULL, return the necessary buffer size via Len. 3246 // Otherwise, write up to Len bytes starting at Ptr containing ifreq 3247 // structs. 3248 stk := inet.StackFromContext(ctx) 3249 if stk == nil { 3250 return syserr.ErrNoDevice.ToError() 3251 } 3252 3253 if ifc.Ptr == 0 { 3254 ifc.Len = int32(len(stk.Interfaces())) * int32(linux.SizeOfIFReq) 3255 return nil 3256 } 3257 3258 max := ifc.Len 3259 ifc.Len = 0 3260 for key, ifaceAddrs := range stk.InterfaceAddrs() { 3261 iface := stk.Interfaces()[key] 3262 for _, ifaceAddr := range ifaceAddrs { 3263 // Don't write past the end of the buffer. 3264 if ifc.Len+int32(linux.SizeOfIFReq) > max { 3265 break 3266 } 3267 if ifaceAddr.Family != linux.AF_INET { 3268 continue 3269 } 3270 3271 // Populate ifr.ifr_addr. 3272 ifr := linux.IFReq{} 3273 ifr.SetName(iface.Name) 3274 hostarch.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family)) 3275 hostarch.ByteOrder.PutUint16(ifr.Data[2:4], 0) 3276 copy(ifr.Data[4:8], ifaceAddr.Addr[:4]) 3277 3278 // Copy the ifr to userspace. 3279 dst := uintptr(ifc.Ptr) + uintptr(ifc.Len) 3280 ifc.Len += int32(linux.SizeOfIFReq) 3281 if _, err := ifr.CopyOut(t, hostarch.Addr(dst)); err != nil { 3282 return err 3283 } 3284 } 3285 } 3286 return nil 3287 } 3288 3289 // interfaceStatusFlags returns status flags for an interface in the stack. 3290 // Flag values and meanings are described in greater detail in netdevice(7) in 3291 // the SIOCGIFFLAGS section. 3292 func interfaceStatusFlags(stack inet.Stack, name string) (uint32, *syserr.Error) { 3293 // We should only ever be passed a netstack.Stack. 3294 epstack, ok := stack.(*Stack) 3295 if !ok { 3296 return 0, errStackType 3297 } 3298 3299 // Find the NIC corresponding to this interface. 3300 for _, info := range epstack.Stack.NICInfo() { 3301 if info.Name == name { 3302 return nicStateFlagsToLinux(info.Flags), nil 3303 } 3304 } 3305 return 0, syserr.ErrNoDevice 3306 } 3307 3308 func nicStateFlagsToLinux(f stack.NICStateFlags) uint32 { 3309 var rv uint32 3310 if f.Up { 3311 rv |= linux.IFF_UP | linux.IFF_LOWER_UP 3312 } 3313 if f.Running { 3314 rv |= linux.IFF_RUNNING 3315 } 3316 if f.Promiscuous { 3317 rv |= linux.IFF_PROMISC 3318 } 3319 if f.Loopback { 3320 rv |= linux.IFF_LOOPBACK 3321 } 3322 return rv 3323 } 3324 3325 // State implements socket.Socket.State. State translates the internal state 3326 // returned by netstack to values defined by Linux. 3327 func (s *sock) State() uint32 { 3328 if s.family != linux.AF_INET && s.family != linux.AF_INET6 { 3329 // States not implemented for this socket's family. 3330 return 0 3331 } 3332 3333 switch { 3334 case socket.IsTCP(s): 3335 // TCP socket. 3336 switch tcp.EndpointState(s.Endpoint.State()) { 3337 case tcp.StateEstablished: 3338 return linux.TCP_ESTABLISHED 3339 case tcp.StateSynSent: 3340 return linux.TCP_SYN_SENT 3341 case tcp.StateSynRecv: 3342 return linux.TCP_SYN_RECV 3343 case tcp.StateFinWait1: 3344 return linux.TCP_FIN_WAIT1 3345 case tcp.StateFinWait2: 3346 return linux.TCP_FIN_WAIT2 3347 case tcp.StateTimeWait: 3348 return linux.TCP_TIME_WAIT 3349 case tcp.StateClose, tcp.StateInitial, tcp.StateBound, tcp.StateConnecting, tcp.StateError: 3350 return linux.TCP_CLOSE 3351 case tcp.StateCloseWait: 3352 return linux.TCP_CLOSE_WAIT 3353 case tcp.StateLastAck: 3354 return linux.TCP_LAST_ACK 3355 case tcp.StateListen: 3356 return linux.TCP_LISTEN 3357 case tcp.StateClosing: 3358 return linux.TCP_CLOSING 3359 default: 3360 // Internal or unknown state. 3361 return 0 3362 } 3363 case socket.IsUDP(s): 3364 // UDP socket. 3365 switch transport.DatagramEndpointState(s.Endpoint.State()) { 3366 case transport.DatagramEndpointStateInitial, transport.DatagramEndpointStateBound, transport.DatagramEndpointStateClosed: 3367 return linux.TCP_CLOSE 3368 case transport.DatagramEndpointStateConnected: 3369 return linux.TCP_ESTABLISHED 3370 default: 3371 return 0 3372 } 3373 case socket.IsICMP(s): 3374 // TODO(b/112063468): Export states for ICMP sockets. 3375 case socket.IsRaw(s): 3376 // TODO(b/112063468): Export states for raw sockets. 3377 default: 3378 // Unknown transport protocol, how did we make this socket? 3379 log.Warningf("Unknown transport protocol for an existing socket: family=%v, type=%v, protocol=%v, internal type %v", s.family, s.skType, s.protocol, reflect.TypeOf(s.Endpoint).Elem()) 3380 return 0 3381 } 3382 3383 return 0 3384 } 3385 3386 // Type implements socket.Socket.Type. 3387 func (s *sock) Type() (family int, skType linux.SockType, protocol int) { 3388 return s.family, s.skType, s.protocol 3389 } 3390 3391 // EventRegister implements waiter.Waitable. 3392 func (s *sock) EventRegister(e *waiter.Entry) error { 3393 s.Queue.EventRegister(e) 3394 return nil 3395 } 3396 3397 // EventUnregister implements waiter.Waitable.EventUnregister. 3398 func (s *sock) EventUnregister(e *waiter.Entry) { 3399 s.Queue.EventUnregister(e) 3400 }