github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/socket/netstack/netstack.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package netstack provides an implementation of the socket.Socket interface 16 // that is backed by a tcpip.Endpoint. 17 // 18 // It does not depend on any particular endpoint implementation, and thus can 19 // be used to expose certain endpoints to the sentry while leaving others out, 20 // for example, TCP endpoints and Unix-domain endpoints. 21 // 22 // Lock ordering: netstack => mm: ioSequenceReadWriter copies user memory inside 23 // tcpip.Endpoint.Write(). Netstack is allowed to (and does) hold locks during 24 // this operation. 25 package netstack 26 27 import ( 28 "bytes" 29 "encoding/binary" 30 "fmt" 31 "io" 32 "io/ioutil" 33 "math" 34 "reflect" 35 "time" 36 37 "golang.org/x/sys/unix" 38 "github.com/SagerNet/gvisor/pkg/abi/linux" 39 "github.com/SagerNet/gvisor/pkg/abi/linux/errno" 40 "github.com/SagerNet/gvisor/pkg/context" 41 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 42 "github.com/SagerNet/gvisor/pkg/hostarch" 43 "github.com/SagerNet/gvisor/pkg/log" 44 "github.com/SagerNet/gvisor/pkg/marshal" 45 "github.com/SagerNet/gvisor/pkg/marshal/primitive" 46 "github.com/SagerNet/gvisor/pkg/metric" 47 "github.com/SagerNet/gvisor/pkg/sentry/arch" 48 "github.com/SagerNet/gvisor/pkg/sentry/fs" 49 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 50 "github.com/SagerNet/gvisor/pkg/sentry/inet" 51 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 52 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 53 "github.com/SagerNet/gvisor/pkg/sentry/socket" 54 "github.com/SagerNet/gvisor/pkg/sentry/socket/netfilter" 55 "github.com/SagerNet/gvisor/pkg/sentry/unimpl" 56 "github.com/SagerNet/gvisor/pkg/sync" 57 "github.com/SagerNet/gvisor/pkg/syserr" 58 "github.com/SagerNet/gvisor/pkg/syserror" 59 "github.com/SagerNet/gvisor/pkg/tcpip" 60 "github.com/SagerNet/gvisor/pkg/tcpip/header" 61 "github.com/SagerNet/gvisor/pkg/tcpip/stack" 62 "github.com/SagerNet/gvisor/pkg/tcpip/transport/tcp" 63 "github.com/SagerNet/gvisor/pkg/tcpip/transport/udp" 64 "github.com/SagerNet/gvisor/pkg/usermem" 65 "github.com/SagerNet/gvisor/pkg/waiter" 66 ) 67 68 func mustCreateMetric(name, description string) *tcpip.StatCounter { 69 var cm tcpip.StatCounter 70 metric.MustRegisterCustomUint64Metric(name, true /* cumulative */, false /* sync */, description, cm.Value) 71 return &cm 72 } 73 74 func mustCreateGauge(name, description string) *tcpip.StatCounter { 75 var cm tcpip.StatCounter 76 metric.MustRegisterCustomUint64Metric(name, false /* cumulative */, false /* sync */, description, cm.Value) 77 return &cm 78 } 79 80 // Metrics contains metrics exported by netstack. 81 var Metrics = tcpip.Stats{ 82 DroppedPackets: mustCreateMetric("/netstack/dropped_packets", "Number of packets dropped at the transport layer."), 83 NICs: tcpip.NICStats{ 84 UnknownL3ProtocolRcvdPackets: mustCreateMetric("/netstack/nic/unknown_l3_protocol_received_packets", "Number of packets received that were for an unknown or unsupported L3 protocol."), 85 UnknownL4ProtocolRcvdPackets: mustCreateMetric("/netstack/nic/unknown_l4_protocol_received_packets", "Number of packets received that were for an unknown or unsupported L4 protocol."), 86 MalformedL4RcvdPackets: mustCreateMetric("/netstack/nic/malformed_l4_received_packets", "Number of packets received that failed L4 header parsing."), 87 Tx: tcpip.NICPacketStats{ 88 Packets: mustCreateMetric("/netstack/nic/tx/packets", "Number of packets transmitted."), 89 Bytes: mustCreateMetric("/netstack/nic/tx/bytes", "Number of bytes transmitted."), 90 }, 91 Rx: tcpip.NICPacketStats{ 92 Packets: mustCreateMetric("/netstack/nic/rx/packets", "Number of packets received."), 93 Bytes: mustCreateMetric("/netstack/nic/rx/bytes", "Number of bytes received."), 94 }, 95 DisabledRx: tcpip.NICPacketStats{ 96 Packets: mustCreateMetric("/netstack/nic/disabled_rx/packets", "Number of packets received on disabled NICs."), 97 Bytes: mustCreateMetric("/netstack/nic/disabled_rx/bytes", "Number of bytes received on disabled NICs."), 98 }, 99 Neighbor: tcpip.NICNeighborStats{ 100 UnreachableEntryLookups: mustCreateMetric("/netstack/nic/neighbor/unreachable_entry_loopups", "Number of lookups performed on a neighbor entry in Unreachable state."), 101 }, 102 }, 103 ICMP: tcpip.ICMPStats{ 104 V4: tcpip.ICMPv4Stats{ 105 PacketsSent: tcpip.ICMPv4SentPacketStats{ 106 ICMPv4PacketStats: tcpip.ICMPv4PacketStats{ 107 EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_request", "Number of ICMPv4 echo request packets sent."), 108 EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_reply", "Number of ICMPv4 echo reply packets sent."), 109 DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_sent/dst_unreachable", "Number of ICMPv4 destination unreachable packets sent."), 110 SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_sent/src_quench", "Number of ICMPv4 source quench packets sent."), 111 Redirect: mustCreateMetric("/netstack/icmp/v4/packets_sent/redirect", "Number of ICMPv4 redirect packets sent."), 112 TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_sent/time_exceeded", "Number of ICMPv4 time exceeded packets sent."), 113 ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_sent/param_problem", "Number of ICMPv4 parameter problem packets sent."), 114 Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp", "Number of ICMPv4 timestamp packets sent."), 115 TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp_reply", "Number of ICMPv4 timestamp reply packets sent."), 116 InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_request", "Number of ICMPv4 information request packets sent."), 117 InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_reply", "Number of ICMPv4 information reply packets sent."), 118 }, 119 Dropped: mustCreateMetric("/netstack/icmp/v4/packets_sent/dropped", "Number of ICMPv4 packets dropped due to link layer errors."), 120 RateLimited: mustCreateMetric("/netstack/icmp/v4/packets_sent/rate_limited", "Number of ICMPv4 packets dropped due to rate limit being exceeded."), 121 }, 122 PacketsReceived: tcpip.ICMPv4ReceivedPacketStats{ 123 ICMPv4PacketStats: tcpip.ICMPv4PacketStats{ 124 EchoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_request", "Number of ICMPv4 echo request packets received."), 125 EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_reply", "Number of ICMPv4 echo reply packets received."), 126 DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_received/dst_unreachable", "Number of ICMPv4 destination unreachable packets received."), 127 SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_received/src_quench", "Number of ICMPv4 source quench packets received."), 128 Redirect: mustCreateMetric("/netstack/icmp/v4/packets_received/redirect", "Number of ICMPv4 redirect packets received."), 129 TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_received/time_exceeded", "Number of ICMPv4 time exceeded packets received."), 130 ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_received/param_problem", "Number of ICMPv4 parameter problem packets received."), 131 Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp", "Number of ICMPv4 timestamp packets received."), 132 TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp_reply", "Number of ICMPv4 timestamp reply packets received."), 133 InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/info_request", "Number of ICMPv4 information request packets received."), 134 InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/info_reply", "Number of ICMPv4 information reply packets received."), 135 }, 136 Invalid: mustCreateMetric("/netstack/icmp/v4/packets_received/invalid", "Number of ICMPv4 packets received that the transport layer could not parse."), 137 }, 138 }, 139 V6: tcpip.ICMPv6Stats{ 140 PacketsSent: tcpip.ICMPv6SentPacketStats{ 141 ICMPv6PacketStats: tcpip.ICMPv6PacketStats{ 142 EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_request", "Number of ICMPv6 echo request packets sent."), 143 EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_reply", "Number of ICMPv6 echo reply packets sent."), 144 DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_sent/dst_unreachable", "Number of ICMPv6 destination unreachable packets sent."), 145 PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_sent/packet_too_big", "Number of ICMPv6 packet too big packets sent."), 146 TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_sent/time_exceeded", "Number of ICMPv6 time exceeded packets sent."), 147 ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_sent/param_problem", "Number of ICMPv6 parameter problem packets sent."), 148 RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_solicit", "Number of ICMPv6 router solicit packets sent."), 149 RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_advert", "Number of ICMPv6 router advert packets sent."), 150 NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets sent."), 151 NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_advert", "Number of ICMPv6 neighbor advert packets sent."), 152 RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_sent/redirect_msg", "Number of ICMPv6 redirect message packets sent."), 153 MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_query", "Number of ICMPv6 multicast listener query packets sent."), 154 MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent."), 155 MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_sent/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent."), 156 }, 157 Dropped: mustCreateMetric("/netstack/icmp/v6/packets_sent/dropped", "Number of ICMPv6 packets dropped due to link layer errors."), 158 RateLimited: mustCreateMetric("/netstack/icmp/v6/packets_sent/rate_limited", "Number of ICMPv6 packets dropped due to rate limit being exceeded."), 159 }, 160 PacketsReceived: tcpip.ICMPv6ReceivedPacketStats{ 161 ICMPv6PacketStats: tcpip.ICMPv6PacketStats{ 162 EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_request", "Number of ICMPv6 echo request packets received."), 163 EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_reply", "Number of ICMPv6 echo reply packets received."), 164 DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_received/dst_unreachable", "Number of ICMPv6 destination unreachable packets received."), 165 PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_received/packet_too_big", "Number of ICMPv6 packet too big packets received."), 166 TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_received/time_exceeded", "Number of ICMPv6 time exceeded packets received."), 167 ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_received/param_problem", "Number of ICMPv6 parameter problem packets received."), 168 RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/router_solicit", "Number of ICMPv6 router solicit packets received."), 169 RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/router_advert", "Number of ICMPv6 router advert packets received."), 170 NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_solicit", "Number of ICMPv6 neighbor solicit packets received."), 171 NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_advert", "Number of ICMPv6 neighbor advert packets received."), 172 RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_received/redirect_msg", "Number of ICMPv6 redirect message packets received."), 173 MulticastListenerQuery: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_query", "Number of ICMPv6 multicast listener query packets received."), 174 MulticastListenerReport: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_report", "Number of ICMPv6 multicast listener report packets sent."), 175 MulticastListenerDone: mustCreateMetric("/netstack/icmp/v6/packets_received/multicast_listener_done", "Number of ICMPv6 multicast listener done packets sent."), 176 }, 177 Unrecognized: mustCreateMetric("/netstack/icmp/v6/packets_received/unrecognized", "Number of ICMPv6 packets received that the transport layer does not know how to parse."), 178 Invalid: mustCreateMetric("/netstack/icmp/v6/packets_received/invalid", "Number of ICMPv6 packets received that the transport layer could not parse."), 179 RouterOnlyPacketsDroppedByHost: mustCreateMetric("/netstack/icmp/v6/packets_received/router_only_packets_dropped_by_host", "Number of ICMPv6 packets dropped due to being router-specific packets."), 180 }, 181 }, 182 }, 183 IGMP: tcpip.IGMPStats{ 184 PacketsSent: tcpip.IGMPSentPacketStats{ 185 IGMPPacketStats: tcpip.IGMPPacketStats{ 186 MembershipQuery: mustCreateMetric("/netstack/igmp/packets_sent/membership_query", "Number of IGMP Membership Query messages sent."), 187 V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v1_membership_report", "Number of IGMPv1 Membership Report messages sent."), 188 V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_sent/v2_membership_report", "Number of IGMPv2 Membership Report messages sent."), 189 LeaveGroup: mustCreateMetric("/netstack/igmp/packets_sent/leave_group", "Number of IGMP Leave Group messages sent."), 190 }, 191 Dropped: mustCreateMetric("/netstack/igmp/packets_sent/dropped", "Number of IGMP packets dropped due to link layer errors."), 192 }, 193 PacketsReceived: tcpip.IGMPReceivedPacketStats{ 194 IGMPPacketStats: tcpip.IGMPPacketStats{ 195 MembershipQuery: mustCreateMetric("/netstack/igmp/packets_received/membership_query", "Number of IGMP Membership Query messages received."), 196 V1MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v1_membership_report", "Number of IGMPv1 Membership Report messages received."), 197 V2MembershipReport: mustCreateMetric("/netstack/igmp/packets_received/v2_membership_report", "Number of IGMPv2 Membership Report messages received."), 198 LeaveGroup: mustCreateMetric("/netstack/igmp/packets_received/leave_group", "Number of IGMP Leave Group messages received."), 199 }, 200 Invalid: mustCreateMetric("/netstack/igmp/packets_received/invalid", "Number of IGMP packets received that could not be parsed."), 201 ChecksumErrors: mustCreateMetric("/netstack/igmp/packets_received/checksum_errors", "Number of received IGMP packets with bad checksums."), 202 Unrecognized: mustCreateMetric("/netstack/igmp/packets_received/unrecognized", "Number of unrecognized IGMP packets received."), 203 }, 204 }, 205 IP: tcpip.IPStats{ 206 PacketsReceived: mustCreateMetric("/netstack/ip/packets_received", "Number of IP packets received from the link layer in nic.DeliverNetworkPacket."), 207 DisabledPacketsReceived: mustCreateMetric("/netstack/ip/disabled_packets_received", "Number of IP packets received from the link layer when the IP layer is disabled."), 208 InvalidDestinationAddressesReceived: mustCreateMetric("/netstack/ip/invalid_addresses_received", "Number of IP packets received with an unknown or invalid destination address."), 209 InvalidSourceAddressesReceived: mustCreateMetric("/netstack/ip/invalid_source_addresses_received", "Number of IP packets received with an unknown or invalid source address."), 210 PacketsDelivered: mustCreateMetric("/netstack/ip/packets_delivered", "Number of incoming IP packets that are successfully delivered to the transport layer via HandlePacket."), 211 PacketsSent: mustCreateMetric("/netstack/ip/packets_sent", "Number of IP packets sent via WritePacket."), 212 OutgoingPacketErrors: mustCreateMetric("/netstack/ip/outgoing_packet_errors", "Number of IP packets which failed to write to a link-layer endpoint."), 213 MalformedPacketsReceived: mustCreateMetric("/netstack/ip/malformed_packets_received", "Number of IP packets which failed IP header validation checks."), 214 MalformedFragmentsReceived: mustCreateMetric("/netstack/ip/malformed_fragments_received", "Number of IP fragments which failed IP fragment validation checks."), 215 IPTablesPreroutingDropped: mustCreateMetric("/netstack/ip/iptables/prerouting_dropped", "Number of IP packets dropped in the Prerouting chain."), 216 IPTablesInputDropped: mustCreateMetric("/netstack/ip/iptables/input_dropped", "Number of IP packets dropped in the Input chain."), 217 IPTablesOutputDropped: mustCreateMetric("/netstack/ip/iptables/output_dropped", "Number of IP packets dropped in the Output chain."), 218 OptionTimestampReceived: mustCreateMetric("/netstack/ip/options/timestamp_received", "Number of timestamp options found in received IP packets."), 219 OptionRecordRouteReceived: mustCreateMetric("/netstack/ip/options/record_route_received", "Number of record route options found in received IP packets."), 220 OptionRouterAlertReceived: mustCreateMetric("/netstack/ip/options/router_alert_received", "Number of router alert options found in received IP packets."), 221 OptionUnknownReceived: mustCreateMetric("/netstack/ip/options/unknown_received", "Number of unknown options found in received IP packets."), 222 Forwarding: tcpip.IPForwardingStats{ 223 Unrouteable: mustCreateMetric("/netstack/ip/forwarding/unrouteable", "Number of IP packets received which couldn't be routed and thus were not forwarded."), 224 ExhaustedTTL: mustCreateMetric("/netstack/ip/forwarding/exhausted_ttl", "Number of IP packets received which could not be forwarded due to an exhausted TTL."), 225 LinkLocalSource: mustCreateMetric("/netstack/ip/forwarding/link_local_source_address", "Number of IP packets received which could not be forwarded due to a link-local source address."), 226 LinkLocalDestination: mustCreateMetric("/netstack/ip/forwarding/link_local_destination_address", "Number of IP packets received which could not be forwarded due to a link-local destination address."), 227 ExtensionHeaderProblem: mustCreateMetric("/netstack/ip/forwarding/extension_header_problem", "Number of IP packets received which could not be forwarded due to a problem processing their IPv6 extension headers."), 228 PacketTooBig: mustCreateMetric("/netstack/ip/forwarding/packet_too_big", "Number of IP packets received which could not be forwarded because they could not fit within the outgoing MTU."), 229 HostUnreachable: mustCreateMetric("/netstack/ip/forwarding/host_unreachable", "Number of IP packets received which could not be forwarded due to unresolvable next hop."), 230 Errors: mustCreateMetric("/netstack/ip/forwarding/errors", "Number of IP packets which couldn't be forwarded."), 231 }, 232 }, 233 ARP: tcpip.ARPStats{ 234 PacketsReceived: mustCreateMetric("/netstack/arp/packets_received", "Number of ARP packets received from the link layer."), 235 DisabledPacketsReceived: mustCreateMetric("/netstack/arp/disabled_packets_received", "Number of ARP packets received from the link layer when the ARP layer is disabled."), 236 MalformedPacketsReceived: mustCreateMetric("/netstack/arp/malformed_packets_received", "Number of ARP packets which failed ARP header validation checks."), 237 RequestsReceived: mustCreateMetric("/netstack/arp/requests_received", "Number of ARP requests received."), 238 RequestsReceivedUnknownTargetAddress: mustCreateMetric("/netstack/arp/requests_received_unknown_addr", "Number of ARP requests received with an unknown target address."), 239 OutgoingRequestInterfaceHasNoLocalAddressErrors: mustCreateMetric("/netstack/arp/outgoing_requests_iface_has_no_addr", "Number of failed attempts to send an ARP request with an interface that has no network address."), 240 OutgoingRequestBadLocalAddressErrors: mustCreateMetric("/netstack/arp/outgoing_requests_invalid_local_addr", "Number of failed attempts to send an ARP request with a provided local address that is invalid."), 241 OutgoingRequestsDropped: mustCreateMetric("/netstack/arp/outgoing_requests_dropped", "Number of ARP requests which failed to write to a link-layer endpoint."), 242 OutgoingRequestsSent: mustCreateMetric("/netstack/arp/outgoing_requests_sent", "Number of ARP requests sent."), 243 RepliesReceived: mustCreateMetric("/netstack/arp/replies_received", "Number of ARP replies received."), 244 OutgoingRepliesDropped: mustCreateMetric("/netstack/arp/outgoing_replies_dropped", "Number of ARP replies which failed to write to a link-layer endpoint."), 245 OutgoingRepliesSent: mustCreateMetric("/netstack/arp/outgoing_replies_sent", "Number of ARP replies sent."), 246 }, 247 TCP: tcpip.TCPStats{ 248 ActiveConnectionOpenings: mustCreateMetric("/netstack/tcp/active_connection_openings", "Number of connections opened successfully via Connect."), 249 PassiveConnectionOpenings: mustCreateMetric("/netstack/tcp/passive_connection_openings", "Number of connections opened successfully via Listen."), 250 CurrentEstablished: mustCreateGauge("/netstack/tcp/current_established", "Number of connections in ESTABLISHED state now."), 251 CurrentConnected: mustCreateGauge("/netstack/tcp/current_open", "Number of connections that are in connected state."), 252 EstablishedResets: mustCreateMetric("/netstack/tcp/established_resets", "Number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state"), 253 EstablishedClosed: mustCreateMetric("/netstack/tcp/established_closed", "Number of times established TCP connections made a transition to CLOSED state."), 254 EstablishedTimedout: mustCreateMetric("/netstack/tcp/established_timedout", "Number of times an established connection was reset because of keep-alive time out."), 255 ListenOverflowSynDrop: mustCreateMetric("/netstack/tcp/listen_overflow_syn_drop", "Number of times the listen queue overflowed and a SYN was dropped."), 256 ListenOverflowAckDrop: mustCreateMetric("/netstack/tcp/listen_overflow_ack_drop", "Number of times the listen queue overflowed and the final ACK in the handshake was dropped."), 257 ListenOverflowSynCookieSent: mustCreateMetric("/netstack/tcp/listen_overflow_syn_cookie_sent", "Number of times a SYN cookie was sent."), 258 ListenOverflowSynCookieRcvd: mustCreateMetric("/netstack/tcp/listen_overflow_syn_cookie_rcvd", "Number of times a SYN cookie was received."), 259 ListenOverflowInvalidSynCookieRcvd: mustCreateMetric("/netstack/tcp/listen_overflow_invalid_syn_cookie_rcvd", "Number of times an invalid SYN cookie was received."), 260 FailedConnectionAttempts: mustCreateMetric("/netstack/tcp/failed_connection_attempts", "Number of calls to Connect or Listen (active and passive openings, respectively) that end in an error."), 261 ValidSegmentsReceived: mustCreateMetric("/netstack/tcp/valid_segments_received", "Number of TCP segments received that the transport layer successfully parsed."), 262 InvalidSegmentsReceived: mustCreateMetric("/netstack/tcp/invalid_segments_received", "Number of TCP segments received that the transport layer could not parse."), 263 SegmentsSent: mustCreateMetric("/netstack/tcp/segments_sent", "Number of TCP segments sent."), 264 SegmentSendErrors: mustCreateMetric("/netstack/tcp/segment_send_errors", "Number of TCP segments failed to be sent."), 265 ResetsSent: mustCreateMetric("/netstack/tcp/resets_sent", "Number of TCP resets sent."), 266 ResetsReceived: mustCreateMetric("/netstack/tcp/resets_received", "Number of TCP resets received."), 267 Retransmits: mustCreateMetric("/netstack/tcp/retransmits", "Number of TCP segments retransmitted."), 268 FastRecovery: mustCreateMetric("/netstack/tcp/fast_recovery", "Number of times fast recovery was used to recover from packet loss."), 269 SACKRecovery: mustCreateMetric("/netstack/tcp/sack_recovery", "Number of times SACK recovery was used to recover from packet loss."), 270 TLPRecovery: mustCreateMetric("/netstack/tcp/tlp_recovery", "Number of times tail loss probe triggers recovery from tail loss."), 271 SlowStartRetransmits: mustCreateMetric("/netstack/tcp/slow_start_retransmits", "Number of segments retransmitted in slow start mode."), 272 FastRetransmit: mustCreateMetric("/netstack/tcp/fast_retransmit", "Number of TCP segments which were fast retransmitted."), 273 Timeouts: mustCreateMetric("/netstack/tcp/timeouts", "Number of times RTO expired."), 274 ChecksumErrors: mustCreateMetric("/netstack/tcp/checksum_errors", "Number of segments dropped due to bad checksums."), 275 FailedPortReservations: mustCreateMetric("/netstack/tcp/failed_port_reservations", "Number of time TCP failed to reserve a port."), 276 }, 277 UDP: tcpip.UDPStats{ 278 PacketsReceived: mustCreateMetric("/netstack/udp/packets_received", "Number of UDP datagrams received via HandlePacket."), 279 UnknownPortErrors: mustCreateMetric("/netstack/udp/unknown_port_errors", "Number of incoming UDP datagrams dropped because they did not have a known destination port."), 280 ReceiveBufferErrors: mustCreateMetric("/netstack/udp/receive_buffer_errors", "Number of incoming UDP datagrams dropped due to the receiving buffer being in an invalid state."), 281 MalformedPacketsReceived: mustCreateMetric("/netstack/udp/malformed_packets_received", "Number of incoming UDP datagrams dropped due to the UDP header being in a malformed state."), 282 PacketsSent: mustCreateMetric("/netstack/udp/packets_sent", "Number of UDP datagrams sent."), 283 PacketSendErrors: mustCreateMetric("/netstack/udp/packet_send_errors", "Number of UDP datagrams failed to be sent."), 284 ChecksumErrors: mustCreateMetric("/netstack/udp/checksum_errors", "Number of UDP datagrams dropped due to bad checksums."), 285 }, 286 } 287 288 // DefaultTTL is linux's default TTL. All network protocols in all stacks used 289 // with this package must have this value set as their default TTL. 290 const DefaultTTL = 64 291 292 const sizeOfInt32 int = 4 293 294 var errStackType = syserr.New("expected but did not receive a netstack.Stack", errno.EINVAL) 295 296 // commonEndpoint represents the intersection of a tcpip.Endpoint and a 297 // transport.Endpoint. 298 type commonEndpoint interface { 299 // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress and 300 // transport.Endpoint.GetLocalAddress. 301 GetLocalAddress() (tcpip.FullAddress, tcpip.Error) 302 303 // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress and 304 // transport.Endpoint.GetRemoteAddress. 305 GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) 306 307 // Readiness implements tcpip.Endpoint.Readiness and 308 // transport.Endpoint.Readiness. 309 Readiness(mask waiter.EventMask) waiter.EventMask 310 311 // SetSockOpt implements tcpip.Endpoint.SetSockOpt and 312 // transport.Endpoint.SetSockOpt. 313 SetSockOpt(tcpip.SettableSocketOption) tcpip.Error 314 315 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt and 316 // transport.Endpoint.SetSockOptInt. 317 SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error 318 319 // GetSockOpt implements tcpip.Endpoint.GetSockOpt and 320 // transport.Endpoint.GetSockOpt. 321 GetSockOpt(tcpip.GettableSocketOption) tcpip.Error 322 323 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt and 324 // transport.Endpoint.GetSockOpt. 325 GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) 326 327 // State returns a socket's lifecycle state. The returned value is 328 // protocol-specific and is primarily used for diagnostics. 329 State() uint32 330 331 // LastError implements tcpip.Endpoint.LastError and 332 // transport.Endpoint.LastError. 333 LastError() tcpip.Error 334 335 // SocketOptions implements tcpip.Endpoint.SocketOptions and 336 // transport.Endpoint.SocketOptions. 337 SocketOptions() *tcpip.SocketOptions 338 } 339 340 // LINT.IfChange 341 342 // SocketOperations encapsulates all the state needed to represent a network stack 343 // endpoint in the kernel context. 344 // 345 // +stateify savable 346 type SocketOperations struct { 347 fsutil.FilePipeSeek `state:"nosave"` 348 fsutil.FileNotDirReaddir `state:"nosave"` 349 fsutil.FileNoopFlush `state:"nosave"` 350 fsutil.FileNoFsync `state:"nosave"` 351 fsutil.FileNoMMap `state:"nosave"` 352 fsutil.FileUseInodeUnstableAttr `state:"nosave"` 353 354 socketOpsCommon 355 } 356 357 // socketOpsCommon contains the socket operations common to VFS1 and VFS2. 358 // 359 // +stateify savable 360 type socketOpsCommon struct { 361 socket.SendReceiveTimeout 362 *waiter.Queue 363 364 family int 365 Endpoint tcpip.Endpoint 366 skType linux.SockType 367 protocol int 368 369 // readMu protects access to the below fields. 370 readMu sync.Mutex `state:"nosave"` 371 372 // sockOptTimestamp corresponds to SO_TIMESTAMP. When true, timestamps 373 // of returned messages can be returned via control messages. When 374 // false, the same timestamp is instead stored and can be read via the 375 // SIOCGSTAMP ioctl. It is protected by readMu. See socket(7). 376 sockOptTimestamp bool 377 // timestampValid indicates whether timestamp for SIOCGSTAMP has been 378 // set. It is protected by readMu. 379 timestampValid bool 380 // timestampNS holds the timestamp to use with SIOCTSTAMP. It is only 381 // valid when timestampValid is true. It is protected by readMu. 382 timestampNS int64 383 384 // TODO(b/153685824): Move this to SocketOptions. 385 // sockOptInq corresponds to TCP_INQ. 386 sockOptInq bool 387 } 388 389 // New creates a new endpoint socket. 390 func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue *waiter.Queue, endpoint tcpip.Endpoint) (*fs.File, *syserr.Error) { 391 if skType == linux.SOCK_STREAM { 392 endpoint.SocketOptions().SetDelayOption(true) 393 } 394 395 dirent := socket.NewDirent(t, netstackDevice) 396 defer dirent.DecRef(t) 397 return fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, &SocketOperations{ 398 socketOpsCommon: socketOpsCommon{ 399 Queue: queue, 400 family: family, 401 Endpoint: endpoint, 402 skType: skType, 403 protocol: protocol, 404 }, 405 }), nil 406 } 407 408 var sockAddrInetSize = (*linux.SockAddrInet)(nil).SizeBytes() 409 var sockAddrInet6Size = (*linux.SockAddrInet6)(nil).SizeBytes() 410 var sockAddrLinkSize = (*linux.SockAddrLink)(nil).SizeBytes() 411 412 // bytesToIPAddress converts an IPv4 or IPv6 address from the user to the 413 // netstack representation taking any addresses into account. 414 func bytesToIPAddress(addr []byte) tcpip.Address { 415 if bytes.Equal(addr, make([]byte, 4)) || bytes.Equal(addr, make([]byte, 16)) { 416 return "" 417 } 418 return tcpip.Address(addr) 419 } 420 421 func (s *socketOpsCommon) isPacketBased() bool { 422 return s.skType == linux.SOCK_DGRAM || s.skType == linux.SOCK_SEQPACKET || s.skType == linux.SOCK_RDM || s.skType == linux.SOCK_RAW 423 } 424 425 // Release implements fs.FileOperations.Release. 426 func (s *socketOpsCommon) Release(ctx context.Context) { 427 e, ch := waiter.NewChannelEntry(nil) 428 s.EventRegister(&e, waiter.EventHUp|waiter.EventErr) 429 defer s.EventUnregister(&e) 430 431 s.Endpoint.Close() 432 433 // SO_LINGER option is valid only for TCP. For other socket types 434 // return after endpoint close. 435 if family, skType, _ := s.Type(); skType != linux.SOCK_STREAM || (family != linux.AF_INET && family != linux.AF_INET6) { 436 return 437 } 438 439 v := s.Endpoint.SocketOptions().GetLinger() 440 // The case for zero timeout is handled in tcp endpoint close function. 441 // Close is blocked until either: 442 // 1. The endpoint state is not in any of the states: FIN-WAIT1, 443 // CLOSING and LAST_ACK. 444 // 2. Timeout is reached. 445 if v.Enabled && v.Timeout != 0 { 446 t := kernel.TaskFromContext(ctx) 447 start := t.Kernel().MonotonicClock().Now() 448 deadline := start.Add(v.Timeout) 449 t.BlockWithDeadline(ch, true, deadline) 450 } 451 } 452 453 // Read implements fs.FileOperations.Read. 454 func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { 455 if dst.NumBytes() == 0 { 456 return 0, nil 457 } 458 n, _, _, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false) 459 if err == syserr.ErrWouldBlock { 460 return int64(n), syserror.ErrWouldBlock 461 } 462 if err != nil { 463 return 0, err.ToError() 464 } 465 return int64(n), nil 466 } 467 468 // WriteTo implements fs.FileOperations.WriteTo. 469 func (s *SocketOperations) WriteTo(ctx context.Context, _ *fs.File, dst io.Writer, count int64, dup bool) (int64, error) { 470 s.readMu.Lock() 471 defer s.readMu.Unlock() 472 473 w := tcpip.LimitedWriter{ 474 W: dst, 475 N: count, 476 } 477 478 // This may return a blocking error. 479 res, err := s.Endpoint.Read(&w, tcpip.ReadOptions{ 480 Peek: dup, 481 }) 482 if err != nil { 483 return 0, syserr.TranslateNetstackError(err).ToError() 484 } 485 return int64(res.Count), nil 486 } 487 488 // Write implements fs.FileOperations.Write. 489 func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { 490 r := src.Reader(ctx) 491 n, err := s.Endpoint.Write(r, tcpip.WriteOptions{}) 492 if _, ok := err.(*tcpip.ErrWouldBlock); ok { 493 return 0, syserror.ErrWouldBlock 494 } 495 if err != nil { 496 return 0, syserr.TranslateNetstackError(err).ToError() 497 } 498 499 if n < src.NumBytes() { 500 return n, syserror.ErrWouldBlock 501 } 502 503 return n, nil 504 } 505 506 var _ tcpip.Payloader = (*limitedPayloader)(nil) 507 508 type limitedPayloader struct { 509 inner io.LimitedReader 510 err error 511 } 512 513 func (l *limitedPayloader) Read(p []byte) (int, error) { 514 n, err := l.inner.Read(p) 515 l.err = err 516 return n, err 517 } 518 519 func (l *limitedPayloader) Len() int { 520 return int(l.inner.N) 521 } 522 523 // ReadFrom implements fs.FileOperations.ReadFrom. 524 func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader, count int64) (int64, error) { 525 f := limitedPayloader{ 526 inner: io.LimitedReader{ 527 R: r, 528 N: count, 529 }, 530 } 531 n, err := s.Endpoint.Write(&f, tcpip.WriteOptions{ 532 // Reads may be destructive but should be very fast, 533 // so we can't release the lock while copying data. 534 Atomic: true, 535 }) 536 if _, ok := err.(*tcpip.ErrBadBuffer); ok { 537 return n, f.err 538 } 539 return n, syserr.TranslateNetstackError(err).ToError() 540 } 541 542 // Readiness returns a mask of ready events for socket s. 543 func (s *socketOpsCommon) Readiness(mask waiter.EventMask) waiter.EventMask { 544 return s.Endpoint.Readiness(mask) 545 } 546 547 func (s *socketOpsCommon) checkFamily(family uint16, exact bool) *syserr.Error { 548 if family == uint16(s.family) { 549 return nil 550 } 551 if !exact && family == linux.AF_INET && s.family == linux.AF_INET6 { 552 if !s.Endpoint.SocketOptions().GetV6Only() { 553 return nil 554 } 555 } 556 return syserr.ErrInvalidArgument 557 } 558 559 // mapFamily maps the AF_INET ANY address to the IPv4-mapped IPv6 ANY if the 560 // receiver's family is AF_INET6. 561 // 562 // This is a hack to work around the fact that both IPv4 and IPv6 ANY are 563 // represented by the empty string. 564 // 565 // TODO(github.com/SagerNet/issue/1556): remove this function. 566 func (s *socketOpsCommon) mapFamily(addr tcpip.FullAddress, family uint16) tcpip.FullAddress { 567 if len(addr.Addr) == 0 && s.family == linux.AF_INET6 && family == linux.AF_INET { 568 addr.Addr = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00" 569 } 570 return addr 571 } 572 573 // Connect implements the linux syscall connect(2) for sockets backed by 574 // tpcip.Endpoint. 575 func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { 576 addr, family, err := socket.AddressAndFamily(sockaddr) 577 if err != nil { 578 return err 579 } 580 581 if family == linux.AF_UNSPEC { 582 err := s.Endpoint.Disconnect() 583 if _, ok := err.(*tcpip.ErrNotSupported); ok { 584 return syserr.ErrAddressFamilyNotSupported 585 } 586 return syserr.TranslateNetstackError(err) 587 } 588 589 if err := s.checkFamily(family, false /* exact */); err != nil { 590 return err 591 } 592 addr = s.mapFamily(addr, family) 593 594 // Always return right away in the non-blocking case. 595 if !blocking { 596 return syserr.TranslateNetstackError(s.Endpoint.Connect(addr)) 597 } 598 599 // Register for notification when the endpoint becomes writable, then 600 // initiate the connection. 601 e, ch := waiter.NewChannelEntry(nil) 602 s.EventRegister(&e, waiter.WritableEvents) 603 defer s.EventUnregister(&e) 604 605 switch err := s.Endpoint.Connect(addr); err.(type) { 606 case *tcpip.ErrConnectStarted, *tcpip.ErrAlreadyConnecting: 607 case *tcpip.ErrNoPortAvailable: 608 if (s.family == unix.AF_INET || s.family == unix.AF_INET6) && s.skType == linux.SOCK_STREAM { 609 // TCP unlike UDP returns EADDRNOTAVAIL when it can't 610 // find an available local ephemeral port. 611 return syserr.ErrAddressNotAvailable 612 } 613 return syserr.TranslateNetstackError(err) 614 default: 615 return syserr.TranslateNetstackError(err) 616 } 617 618 // It's pending, so we have to wait for a notification, and fetch the 619 // result once the wait completes. 620 if err := t.Block(ch); err != nil { 621 return syserr.FromError(err) 622 } 623 624 // Call Connect() again after blocking to find connect's result. 625 return syserr.TranslateNetstackError(s.Endpoint.Connect(addr)) 626 } 627 628 // Bind implements the linux syscall bind(2) for sockets backed by 629 // tcpip.Endpoint. 630 func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { 631 if len(sockaddr) < 2 { 632 return syserr.ErrInvalidArgument 633 } 634 635 family := hostarch.ByteOrder.Uint16(sockaddr) 636 var addr tcpip.FullAddress 637 638 // Bind for AF_PACKET requires only family, protocol and ifindex. 639 // In function AddressAndFamily, we check the address length which is 640 // not needed for AF_PACKET bind. 641 if family == linux.AF_PACKET { 642 var a linux.SockAddrLink 643 if len(sockaddr) < sockAddrLinkSize { 644 return syserr.ErrInvalidArgument 645 } 646 a.UnmarshalBytes(sockaddr[:sockAddrLinkSize]) 647 648 if a.Protocol != uint16(s.protocol) { 649 return syserr.ErrInvalidArgument 650 } 651 652 addr = tcpip.FullAddress{ 653 NIC: tcpip.NICID(a.InterfaceIndex), 654 Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]), 655 } 656 } else { 657 var err *syserr.Error 658 addr, family, err = socket.AddressAndFamily(sockaddr) 659 if err != nil { 660 return err 661 } 662 663 if err = s.checkFamily(family, true /* exact */); err != nil { 664 return err 665 } 666 667 addr = s.mapFamily(addr, family) 668 } 669 670 // Issue the bind request to the endpoint. 671 err := s.Endpoint.Bind(addr) 672 if _, ok := err.(*tcpip.ErrNoPortAvailable); ok { 673 // Bind always returns EADDRINUSE irrespective of if the specified port was 674 // already bound or if an ephemeral port was requested but none were 675 // available. 676 // 677 // *tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because 678 // UDP connect returns EAGAIN on ephemeral port exhaustion. 679 // 680 // TCP connect returns EADDRNOTAVAIL on ephemeral port exhaustion. 681 err = &tcpip.ErrPortInUse{} 682 } 683 684 return syserr.TranslateNetstackError(err) 685 } 686 687 // Listen implements the linux syscall listen(2) for sockets backed by 688 // tcpip.Endpoint. 689 func (s *socketOpsCommon) Listen(t *kernel.Task, backlog int) *syserr.Error { 690 return syserr.TranslateNetstackError(s.Endpoint.Listen(backlog)) 691 } 692 693 // blockingAccept implements a blocking version of accept(2), that is, if no 694 // connections are ready to be accept, it will block until one becomes ready. 695 func (s *socketOpsCommon) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, *syserr.Error) { 696 // Register for notifications. 697 e, ch := waiter.NewChannelEntry(nil) 698 s.EventRegister(&e, waiter.ReadableEvents) 699 defer s.EventUnregister(&e) 700 701 // Try to accept the connection again; if it fails, then wait until we 702 // get a notification. 703 for { 704 ep, wq, err := s.Endpoint.Accept(peerAddr) 705 if _, ok := err.(*tcpip.ErrWouldBlock); !ok { 706 return ep, wq, syserr.TranslateNetstackError(err) 707 } 708 709 if err := t.Block(ch); err != nil { 710 return nil, nil, syserr.FromError(err) 711 } 712 } 713 } 714 715 // Accept implements the linux syscall accept(2) for sockets backed by 716 // tcpip.Endpoint. 717 func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) { 718 var peerAddr *tcpip.FullAddress 719 if peerRequested { 720 peerAddr = &tcpip.FullAddress{} 721 } 722 ep, wq, terr := s.Endpoint.Accept(peerAddr) 723 if terr != nil { 724 if _, ok := terr.(*tcpip.ErrWouldBlock); !ok || !blocking { 725 return 0, nil, 0, syserr.TranslateNetstackError(terr) 726 } 727 728 var err *syserr.Error 729 ep, wq, err = s.blockingAccept(t, peerAddr) 730 if err != nil { 731 return 0, nil, 0, err 732 } 733 } 734 735 ns, err := New(t, s.family, s.skType, s.protocol, wq, ep) 736 if err != nil { 737 return 0, nil, 0, err 738 } 739 defer ns.DecRef(t) 740 741 if flags&linux.SOCK_NONBLOCK != 0 { 742 flags := ns.Flags() 743 flags.NonBlocking = true 744 ns.SetFlags(flags.Settable()) 745 } 746 747 var addr linux.SockAddr 748 var addrLen uint32 749 if peerAddr != nil { 750 addr, addrLen = socket.ConvertAddress(s.family, *peerAddr) 751 } 752 753 fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{ 754 CloseOnExec: flags&linux.SOCK_CLOEXEC != 0, 755 }) 756 757 t.Kernel().RecordSocket(ns) 758 759 return fd, addr, addrLen, syserr.FromError(e) 760 } 761 762 // ConvertShutdown converts Linux shutdown flags into tcpip shutdown flags. 763 func ConvertShutdown(how int) (tcpip.ShutdownFlags, *syserr.Error) { 764 var f tcpip.ShutdownFlags 765 switch how { 766 case linux.SHUT_RD: 767 f = tcpip.ShutdownRead 768 case linux.SHUT_WR: 769 f = tcpip.ShutdownWrite 770 case linux.SHUT_RDWR: 771 f = tcpip.ShutdownRead | tcpip.ShutdownWrite 772 default: 773 return 0, syserr.ErrInvalidArgument 774 } 775 return f, nil 776 } 777 778 // Shutdown implements the linux syscall shutdown(2) for sockets backed by 779 // tcpip.Endpoint. 780 func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error { 781 f, err := ConvertShutdown(how) 782 if err != nil { 783 return err 784 } 785 786 // Issue shutdown request. 787 return syserr.TranslateNetstackError(s.Endpoint.Shutdown(f)) 788 } 789 790 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by 791 // tcpip.Endpoint. 792 func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { 793 // TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is 794 // implemented specifically for netstack.SocketOperations rather than 795 // commonEndpoint. commonEndpoint should be extended to support socket 796 // options where the implementation is not shared, as unix sockets need 797 // their own support for SO_TIMESTAMP. 798 if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP { 799 if outLen < sizeOfInt32 { 800 return nil, syserr.ErrInvalidArgument 801 } 802 val := primitive.Int32(0) 803 s.readMu.Lock() 804 defer s.readMu.Unlock() 805 if s.sockOptTimestamp { 806 val = 1 807 } 808 return &val, nil 809 } 810 if level == linux.SOL_TCP && name == linux.TCP_INQ { 811 if outLen < sizeOfInt32 { 812 return nil, syserr.ErrInvalidArgument 813 } 814 val := primitive.Int32(0) 815 s.readMu.Lock() 816 defer s.readMu.Unlock() 817 if s.sockOptInq { 818 val = 1 819 } 820 return &val, nil 821 } 822 823 return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outPtr, outLen) 824 } 825 826 // GetSockOpt can be used to implement the linux syscall getsockopt(2) for 827 // sockets backed by a commonEndpoint. 828 func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { 829 switch level { 830 case linux.SOL_SOCKET: 831 return getSockOptSocket(t, s, ep, family, skType, name, outLen) 832 833 case linux.SOL_TCP: 834 return getSockOptTCP(t, s, ep, name, outLen) 835 836 case linux.SOL_IPV6: 837 return getSockOptIPv6(t, s, ep, name, outPtr, outLen) 838 839 case linux.SOL_IP: 840 return getSockOptIP(t, s, ep, name, outPtr, outLen, family) 841 842 case linux.SOL_UDP, 843 linux.SOL_ICMPV6, 844 linux.SOL_RAW, 845 linux.SOL_PACKET: 846 847 t.Kernel().EmitUnimplementedEvent(t) 848 } 849 850 return nil, syserr.ErrProtocolNotAvailable 851 } 852 853 func boolToInt32(v bool) int32 { 854 if v { 855 return 1 856 } 857 return 0 858 } 859 860 // getSockOptSocket implements GetSockOpt when level is SOL_SOCKET. 861 func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (marshal.Marshallable, *syserr.Error) { 862 // TODO(b/124056281): Stop rejecting short optLen values in getsockopt. 863 switch name { 864 case linux.SO_ERROR: 865 if outLen < sizeOfInt32 { 866 return nil, syserr.ErrInvalidArgument 867 } 868 869 // Get the last error and convert it. 870 err := ep.SocketOptions().GetLastError() 871 if err == nil { 872 optP := primitive.Int32(0) 873 return &optP, nil 874 } 875 876 optP := primitive.Int32(syserr.TranslateNetstackError(err).ToLinux()) 877 return &optP, nil 878 879 case linux.SO_PEERCRED: 880 if family != linux.AF_UNIX || outLen < unix.SizeofUcred { 881 return nil, syserr.ErrInvalidArgument 882 } 883 884 tcred := t.Credentials() 885 creds := linux.ControlMessageCredentials{ 886 PID: int32(t.ThreadGroup().ID()), 887 UID: uint32(tcred.EffectiveKUID.In(tcred.UserNamespace).OrOverflow()), 888 GID: uint32(tcred.EffectiveKGID.In(tcred.UserNamespace).OrOverflow()), 889 } 890 return &creds, nil 891 892 case linux.SO_PASSCRED: 893 if outLen < sizeOfInt32 { 894 return nil, syserr.ErrInvalidArgument 895 } 896 897 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetPassCred())) 898 return &v, nil 899 900 case linux.SO_SNDBUF: 901 if outLen < sizeOfInt32 { 902 return nil, syserr.ErrInvalidArgument 903 } 904 905 size := ep.SocketOptions().GetSendBufferSize() 906 907 if size > math.MaxInt32 { 908 size = math.MaxInt32 909 } 910 911 sizeP := primitive.Int32(size) 912 return &sizeP, nil 913 914 case linux.SO_RCVBUF: 915 if outLen < sizeOfInt32 { 916 return nil, syserr.ErrInvalidArgument 917 } 918 919 size := ep.SocketOptions().GetReceiveBufferSize() 920 921 if size > math.MaxInt32 { 922 size = math.MaxInt32 923 } 924 925 sizeP := primitive.Int32(size) 926 return &sizeP, nil 927 928 case linux.SO_REUSEADDR: 929 if outLen < sizeOfInt32 { 930 return nil, syserr.ErrInvalidArgument 931 } 932 933 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReuseAddress())) 934 return &v, nil 935 936 case linux.SO_REUSEPORT: 937 if outLen < sizeOfInt32 { 938 return nil, syserr.ErrInvalidArgument 939 } 940 941 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReusePort())) 942 return &v, nil 943 944 case linux.SO_BINDTODEVICE: 945 v := ep.SocketOptions().GetBindToDevice() 946 if v == 0 { 947 var b primitive.ByteSlice 948 return &b, nil 949 } 950 if outLen < linux.IFNAMSIZ { 951 return nil, syserr.ErrInvalidArgument 952 } 953 s := t.NetworkContext() 954 if s == nil { 955 return nil, syserr.ErrNoDevice 956 } 957 nic, ok := s.Interfaces()[int32(v)] 958 if !ok { 959 // The NICID no longer indicates a valid interface, probably because that 960 // interface was removed. 961 return nil, syserr.ErrUnknownDevice 962 } 963 964 name := primitive.ByteSlice(append([]byte(nic.Name), 0)) 965 return &name, nil 966 967 case linux.SO_BROADCAST: 968 if outLen < sizeOfInt32 { 969 return nil, syserr.ErrInvalidArgument 970 } 971 972 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetBroadcast())) 973 return &v, nil 974 975 case linux.SO_KEEPALIVE: 976 if outLen < sizeOfInt32 { 977 return nil, syserr.ErrInvalidArgument 978 } 979 980 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetKeepAlive())) 981 return &v, nil 982 983 case linux.SO_LINGER: 984 if outLen < linux.SizeOfLinger { 985 return nil, syserr.ErrInvalidArgument 986 } 987 988 var linger linux.Linger 989 v := ep.SocketOptions().GetLinger() 990 991 if v.Enabled { 992 linger.OnOff = 1 993 } 994 linger.Linger = int32(v.Timeout.Seconds()) 995 return &linger, nil 996 997 case linux.SO_SNDTIMEO: 998 // TODO(igudger): Linux allows shorter lengths for partial results. 999 if outLen < linux.SizeOfTimeval { 1000 return nil, syserr.ErrInvalidArgument 1001 } 1002 1003 sendTimeout := linux.NsecToTimeval(s.SendTimeout()) 1004 return &sendTimeout, nil 1005 1006 case linux.SO_RCVTIMEO: 1007 // TODO(igudger): Linux allows shorter lengths for partial results. 1008 if outLen < linux.SizeOfTimeval { 1009 return nil, syserr.ErrInvalidArgument 1010 } 1011 1012 recvTimeout := linux.NsecToTimeval(s.RecvTimeout()) 1013 return &recvTimeout, nil 1014 1015 case linux.SO_OOBINLINE: 1016 if outLen < sizeOfInt32 { 1017 return nil, syserr.ErrInvalidArgument 1018 } 1019 1020 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetOutOfBandInline())) 1021 return &v, nil 1022 1023 case linux.SO_NO_CHECK: 1024 if outLen < sizeOfInt32 { 1025 return nil, syserr.ErrInvalidArgument 1026 } 1027 1028 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetNoChecksum())) 1029 return &v, nil 1030 1031 case linux.SO_ACCEPTCONN: 1032 if outLen < sizeOfInt32 { 1033 return nil, syserr.ErrInvalidArgument 1034 } 1035 1036 // This option is only viable for TCP endpoints. 1037 var v bool 1038 if _, skType, skProto := s.Type(); isTCPSocket(skType, skProto) { 1039 v = tcp.EndpointState(ep.State()) == tcp.StateListen 1040 } 1041 vP := primitive.Int32(boolToInt32(v)) 1042 return &vP, nil 1043 1044 default: 1045 socket.GetSockOptEmitUnimplementedEvent(t, name) 1046 } 1047 return nil, syserr.ErrProtocolNotAvailable 1048 } 1049 1050 // getSockOptTCP implements GetSockOpt when level is SOL_TCP. 1051 func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name, outLen int) (marshal.Marshallable, *syserr.Error) { 1052 if _, skType, skProto := s.Type(); !isTCPSocket(skType, skProto) { 1053 log.Warningf("SOL_TCP options are only supported on TCP sockets: skType, skProto = %v, %d", skType, skProto) 1054 return nil, syserr.ErrUnknownProtocolOption 1055 } 1056 1057 switch name { 1058 case linux.TCP_NODELAY: 1059 if outLen < sizeOfInt32 { 1060 return nil, syserr.ErrInvalidArgument 1061 } 1062 1063 v := primitive.Int32(boolToInt32(!ep.SocketOptions().GetDelayOption())) 1064 return &v, nil 1065 1066 case linux.TCP_CORK: 1067 if outLen < sizeOfInt32 { 1068 return nil, syserr.ErrInvalidArgument 1069 } 1070 1071 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetCorkOption())) 1072 return &v, nil 1073 1074 case linux.TCP_QUICKACK: 1075 if outLen < sizeOfInt32 { 1076 return nil, syserr.ErrInvalidArgument 1077 } 1078 1079 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetQuickAck())) 1080 return &v, nil 1081 1082 case linux.TCP_MAXSEG: 1083 if outLen < sizeOfInt32 { 1084 return nil, syserr.ErrInvalidArgument 1085 } 1086 1087 v, err := ep.GetSockOptInt(tcpip.MaxSegOption) 1088 if err != nil { 1089 return nil, syserr.TranslateNetstackError(err) 1090 } 1091 vP := primitive.Int32(v) 1092 return &vP, nil 1093 1094 case linux.TCP_KEEPIDLE: 1095 if outLen < sizeOfInt32 { 1096 return nil, syserr.ErrInvalidArgument 1097 } 1098 1099 var v tcpip.KeepaliveIdleOption 1100 if err := ep.GetSockOpt(&v); err != nil { 1101 return nil, syserr.TranslateNetstackError(err) 1102 } 1103 keepAliveIdle := primitive.Int32(time.Duration(v) / time.Second) 1104 return &keepAliveIdle, nil 1105 1106 case linux.TCP_KEEPINTVL: 1107 if outLen < sizeOfInt32 { 1108 return nil, syserr.ErrInvalidArgument 1109 } 1110 1111 var v tcpip.KeepaliveIntervalOption 1112 if err := ep.GetSockOpt(&v); err != nil { 1113 return nil, syserr.TranslateNetstackError(err) 1114 } 1115 keepAliveInterval := primitive.Int32(time.Duration(v) / time.Second) 1116 return &keepAliveInterval, nil 1117 1118 case linux.TCP_KEEPCNT: 1119 if outLen < sizeOfInt32 { 1120 return nil, syserr.ErrInvalidArgument 1121 } 1122 1123 v, err := ep.GetSockOptInt(tcpip.KeepaliveCountOption) 1124 if err != nil { 1125 return nil, syserr.TranslateNetstackError(err) 1126 } 1127 vP := primitive.Int32(v) 1128 return &vP, nil 1129 1130 case linux.TCP_USER_TIMEOUT: 1131 if outLen < sizeOfInt32 { 1132 return nil, syserr.ErrInvalidArgument 1133 } 1134 1135 var v tcpip.TCPUserTimeoutOption 1136 if err := ep.GetSockOpt(&v); err != nil { 1137 return nil, syserr.TranslateNetstackError(err) 1138 } 1139 tcpUserTimeout := primitive.Int32(time.Duration(v) / time.Millisecond) 1140 return &tcpUserTimeout, nil 1141 1142 case linux.TCP_INFO: 1143 var v tcpip.TCPInfoOption 1144 if err := ep.GetSockOpt(&v); err != nil { 1145 return nil, syserr.TranslateNetstackError(err) 1146 } 1147 1148 // TODO(b/64800844): Translate fields once they are added to 1149 // tcpip.TCPInfoOption. 1150 info := linux.TCPInfo{ 1151 State: uint8(v.State), 1152 RTO: uint32(v.RTO / time.Microsecond), 1153 RTT: uint32(v.RTT / time.Microsecond), 1154 RTTVar: uint32(v.RTTVar / time.Microsecond), 1155 SndSsthresh: v.SndSsthresh, 1156 SndCwnd: v.SndCwnd, 1157 } 1158 switch v.CcState { 1159 case tcpip.RTORecovery: 1160 info.CaState = linux.TCP_CA_Loss 1161 case tcpip.FastRecovery, tcpip.SACKRecovery: 1162 info.CaState = linux.TCP_CA_Recovery 1163 case tcpip.Disorder: 1164 info.CaState = linux.TCP_CA_Disorder 1165 case tcpip.Open: 1166 info.CaState = linux.TCP_CA_Open 1167 } 1168 1169 // In netstack reorderSeen is updated only when RACK is enabled. 1170 // We only track whether the reordering is seen, which is 1171 // different than Linux where reorderSeen is not specific to 1172 // RACK and is incremented when a reordering event is seen. 1173 if v.ReorderSeen { 1174 info.ReordSeen = 1 1175 } 1176 1177 // Linux truncates the output binary to outLen. 1178 buf := t.CopyScratchBuffer(info.SizeBytes()) 1179 info.MarshalUnsafe(buf) 1180 if len(buf) > outLen { 1181 buf = buf[:outLen] 1182 } 1183 bufP := primitive.ByteSlice(buf) 1184 return &bufP, nil 1185 1186 case linux.TCP_CC_INFO, 1187 linux.TCP_NOTSENT_LOWAT, 1188 linux.TCP_ZEROCOPY_RECEIVE: 1189 1190 t.Kernel().EmitUnimplementedEvent(t) 1191 1192 case linux.TCP_CONGESTION: 1193 if outLen <= 0 { 1194 return nil, syserr.ErrInvalidArgument 1195 } 1196 1197 var v tcpip.CongestionControlOption 1198 if err := ep.GetSockOpt(&v); err != nil { 1199 return nil, syserr.TranslateNetstackError(err) 1200 } 1201 1202 // We match linux behaviour here where it returns the lower of 1203 // TCP_CA_NAME_MAX bytes or the value of the option length. 1204 // 1205 // This is Linux's net/tcp.h TCP_CA_NAME_MAX. 1206 const tcpCANameMax = 16 1207 1208 toCopy := tcpCANameMax 1209 if outLen < tcpCANameMax { 1210 toCopy = outLen 1211 } 1212 b := make([]byte, toCopy) 1213 copy(b, v) 1214 1215 bP := primitive.ByteSlice(b) 1216 return &bP, nil 1217 1218 case linux.TCP_LINGER2: 1219 if outLen < sizeOfInt32 { 1220 return nil, syserr.ErrInvalidArgument 1221 } 1222 1223 var v tcpip.TCPLingerTimeoutOption 1224 if err := ep.GetSockOpt(&v); err != nil { 1225 return nil, syserr.TranslateNetstackError(err) 1226 } 1227 var lingerTimeout primitive.Int32 1228 if v >= 0 { 1229 lingerTimeout = primitive.Int32(time.Duration(v) / time.Second) 1230 } else { 1231 lingerTimeout = -1 1232 } 1233 return &lingerTimeout, nil 1234 1235 case linux.TCP_DEFER_ACCEPT: 1236 if outLen < sizeOfInt32 { 1237 return nil, syserr.ErrInvalidArgument 1238 } 1239 1240 var v tcpip.TCPDeferAcceptOption 1241 if err := ep.GetSockOpt(&v); err != nil { 1242 return nil, syserr.TranslateNetstackError(err) 1243 } 1244 1245 tcpDeferAccept := primitive.Int32(time.Duration(v) / time.Second) 1246 return &tcpDeferAccept, nil 1247 1248 case linux.TCP_SYNCNT: 1249 if outLen < sizeOfInt32 { 1250 return nil, syserr.ErrInvalidArgument 1251 } 1252 1253 v, err := ep.GetSockOptInt(tcpip.TCPSynCountOption) 1254 if err != nil { 1255 return nil, syserr.TranslateNetstackError(err) 1256 } 1257 vP := primitive.Int32(v) 1258 return &vP, nil 1259 1260 case linux.TCP_WINDOW_CLAMP: 1261 if outLen < sizeOfInt32 { 1262 return nil, syserr.ErrInvalidArgument 1263 } 1264 1265 v, err := ep.GetSockOptInt(tcpip.TCPWindowClampOption) 1266 if err != nil { 1267 return nil, syserr.TranslateNetstackError(err) 1268 } 1269 vP := primitive.Int32(v) 1270 return &vP, nil 1271 default: 1272 emitUnimplementedEventTCP(t, name) 1273 } 1274 return nil, syserr.ErrProtocolNotAvailable 1275 } 1276 1277 // getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6. 1278 func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { 1279 if _, ok := ep.(tcpip.Endpoint); !ok { 1280 log.Warningf("SOL_IPV6 options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 1281 return nil, syserr.ErrUnknownProtocolOption 1282 } 1283 1284 family, skType, _ := s.Type() 1285 if family != linux.AF_INET6 { 1286 return nil, syserr.ErrUnknownProtocolOption 1287 } 1288 1289 switch name { 1290 case linux.IPV6_V6ONLY: 1291 if outLen < sizeOfInt32 { 1292 return nil, syserr.ErrInvalidArgument 1293 } 1294 1295 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetV6Only())) 1296 return &v, nil 1297 1298 case linux.IPV6_PATHMTU: 1299 t.Kernel().EmitUnimplementedEvent(t) 1300 1301 case linux.IPV6_TCLASS: 1302 // Length handling for parity with Linux. 1303 if outLen == 0 { 1304 var b primitive.ByteSlice 1305 return &b, nil 1306 } 1307 v, err := ep.GetSockOptInt(tcpip.IPv6TrafficClassOption) 1308 if err != nil { 1309 return nil, syserr.TranslateNetstackError(err) 1310 } 1311 1312 uintv := primitive.Uint32(v) 1313 // Linux truncates the output binary to outLen. 1314 ib := t.CopyScratchBuffer(uintv.SizeBytes()) 1315 uintv.MarshalUnsafe(ib) 1316 // Handle cases where outLen is lesser than sizeOfInt32. 1317 if len(ib) > outLen { 1318 ib = ib[:outLen] 1319 } 1320 ibP := primitive.ByteSlice(ib) 1321 return &ibP, nil 1322 1323 case linux.IPV6_RECVTCLASS: 1324 if outLen < sizeOfInt32 { 1325 return nil, syserr.ErrInvalidArgument 1326 } 1327 1328 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveTClass())) 1329 return &v, nil 1330 case linux.IPV6_RECVERR: 1331 if outLen < sizeOfInt32 { 1332 return nil, syserr.ErrInvalidArgument 1333 } 1334 1335 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetRecvError())) 1336 return &v, nil 1337 1338 case linux.IPV6_RECVORIGDSTADDR: 1339 if outLen < sizeOfInt32 { 1340 return nil, syserr.ErrInvalidArgument 1341 } 1342 1343 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveOriginalDstAddress())) 1344 return &v, nil 1345 1346 case linux.IP6T_ORIGINAL_DST: 1347 if outLen < sockAddrInet6Size { 1348 return nil, syserr.ErrInvalidArgument 1349 } 1350 1351 var v tcpip.OriginalDestinationOption 1352 if err := ep.GetSockOpt(&v); err != nil { 1353 return nil, syserr.TranslateNetstackError(err) 1354 } 1355 1356 a, _ := socket.ConvertAddress(linux.AF_INET6, tcpip.FullAddress(v)) 1357 return a.(*linux.SockAddrInet6), nil 1358 1359 case linux.IP6T_SO_GET_INFO: 1360 if outLen < linux.SizeOfIPTGetinfo { 1361 return nil, syserr.ErrInvalidArgument 1362 } 1363 1364 // Only valid for raw IPv6 sockets. 1365 if skType != linux.SOCK_RAW { 1366 return nil, syserr.ErrProtocolNotAvailable 1367 } 1368 1369 stack := inet.StackFromContext(t) 1370 if stack == nil { 1371 return nil, syserr.ErrNoDevice 1372 } 1373 info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, true) 1374 if err != nil { 1375 return nil, err 1376 } 1377 return &info, nil 1378 1379 case linux.IP6T_SO_GET_ENTRIES: 1380 // IPTGetEntries is reused for IPv6. 1381 if outLen < linux.SizeOfIPTGetEntries { 1382 return nil, syserr.ErrInvalidArgument 1383 } 1384 // Only valid for raw IPv6 sockets. 1385 if skType != linux.SOCK_RAW { 1386 return nil, syserr.ErrProtocolNotAvailable 1387 } 1388 1389 stack := inet.StackFromContext(t) 1390 if stack == nil { 1391 return nil, syserr.ErrNoDevice 1392 } 1393 entries, err := netfilter.GetEntries6(t, stack.(*Stack).Stack, outPtr, outLen) 1394 if err != nil { 1395 return nil, err 1396 } 1397 return &entries, nil 1398 1399 case linux.IP6T_SO_GET_REVISION_TARGET: 1400 if outLen < linux.SizeOfXTGetRevision { 1401 return nil, syserr.ErrInvalidArgument 1402 } 1403 1404 // Only valid for raw IPv6 sockets. 1405 if skType != linux.SOCK_RAW { 1406 return nil, syserr.ErrProtocolNotAvailable 1407 } 1408 1409 stack := inet.StackFromContext(t) 1410 if stack == nil { 1411 return nil, syserr.ErrNoDevice 1412 } 1413 ret, err := netfilter.TargetRevision(t, outPtr, header.IPv6ProtocolNumber) 1414 if err != nil { 1415 return nil, err 1416 } 1417 return &ret, nil 1418 1419 default: 1420 emitUnimplementedEventIPv6(t, name) 1421 } 1422 return nil, syserr.ErrProtocolNotAvailable 1423 } 1424 1425 // getSockOptIP implements GetSockOpt when level is SOL_IP. 1426 func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int, family int) (marshal.Marshallable, *syserr.Error) { 1427 if _, ok := ep.(tcpip.Endpoint); !ok { 1428 log.Warningf("SOL_IP options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 1429 return nil, syserr.ErrUnknownProtocolOption 1430 } 1431 1432 switch name { 1433 case linux.IP_TTL: 1434 if outLen < sizeOfInt32 { 1435 return nil, syserr.ErrInvalidArgument 1436 } 1437 1438 v, err := ep.GetSockOptInt(tcpip.TTLOption) 1439 if err != nil { 1440 return nil, syserr.TranslateNetstackError(err) 1441 } 1442 1443 // Fill in the default value, if needed. 1444 vP := primitive.Int32(v) 1445 if vP == 0 { 1446 vP = DefaultTTL 1447 } 1448 1449 return &vP, nil 1450 1451 case linux.IP_MULTICAST_TTL: 1452 if outLen < sizeOfInt32 { 1453 return nil, syserr.ErrInvalidArgument 1454 } 1455 1456 v, err := ep.GetSockOptInt(tcpip.MulticastTTLOption) 1457 if err != nil { 1458 return nil, syserr.TranslateNetstackError(err) 1459 } 1460 1461 vP := primitive.Int32(v) 1462 return &vP, nil 1463 1464 case linux.IP_MULTICAST_IF: 1465 if outLen < len(linux.InetAddr{}) { 1466 return nil, syserr.ErrInvalidArgument 1467 } 1468 1469 var v tcpip.MulticastInterfaceOption 1470 if err := ep.GetSockOpt(&v); err != nil { 1471 return nil, syserr.TranslateNetstackError(err) 1472 } 1473 1474 a, _ := socket.ConvertAddress(linux.AF_INET, tcpip.FullAddress{Addr: v.InterfaceAddr}) 1475 1476 return &a.(*linux.SockAddrInet).Addr, nil 1477 1478 case linux.IP_MULTICAST_LOOP: 1479 if outLen < sizeOfInt32 { 1480 return nil, syserr.ErrInvalidArgument 1481 } 1482 1483 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetMulticastLoop())) 1484 return &v, nil 1485 1486 case linux.IP_TOS: 1487 // Length handling for parity with Linux. 1488 if outLen == 0 { 1489 var b primitive.ByteSlice 1490 return &b, nil 1491 } 1492 v, err := ep.GetSockOptInt(tcpip.IPv4TOSOption) 1493 if err != nil { 1494 return nil, syserr.TranslateNetstackError(err) 1495 } 1496 if outLen < sizeOfInt32 { 1497 vP := primitive.Uint8(v) 1498 return &vP, nil 1499 } 1500 vP := primitive.Int32(v) 1501 return &vP, nil 1502 1503 case linux.IP_RECVTOS: 1504 if outLen < sizeOfInt32 { 1505 return nil, syserr.ErrInvalidArgument 1506 } 1507 1508 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveTOS())) 1509 return &v, nil 1510 1511 case linux.IP_RECVERR: 1512 if outLen < sizeOfInt32 { 1513 return nil, syserr.ErrInvalidArgument 1514 } 1515 1516 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetRecvError())) 1517 return &v, nil 1518 1519 case linux.IP_PKTINFO: 1520 if outLen < sizeOfInt32 { 1521 return nil, syserr.ErrInvalidArgument 1522 } 1523 1524 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceivePacketInfo())) 1525 return &v, nil 1526 1527 case linux.IP_HDRINCL: 1528 if outLen < sizeOfInt32 { 1529 return nil, syserr.ErrInvalidArgument 1530 } 1531 1532 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetHeaderIncluded())) 1533 return &v, nil 1534 1535 case linux.IP_RECVORIGDSTADDR: 1536 if outLen < sizeOfInt32 { 1537 return nil, syserr.ErrInvalidArgument 1538 } 1539 1540 v := primitive.Int32(boolToInt32(ep.SocketOptions().GetReceiveOriginalDstAddress())) 1541 return &v, nil 1542 1543 case linux.SO_ORIGINAL_DST: 1544 if outLen < sockAddrInetSize { 1545 return nil, syserr.ErrInvalidArgument 1546 } 1547 1548 var v tcpip.OriginalDestinationOption 1549 if err := ep.GetSockOpt(&v); err != nil { 1550 return nil, syserr.TranslateNetstackError(err) 1551 } 1552 1553 a, _ := socket.ConvertAddress(linux.AF_INET, tcpip.FullAddress(v)) 1554 return a.(*linux.SockAddrInet), nil 1555 1556 case linux.IPT_SO_GET_INFO: 1557 if outLen < linux.SizeOfIPTGetinfo { 1558 return nil, syserr.ErrInvalidArgument 1559 } 1560 1561 // Only valid for raw IPv4 sockets. 1562 if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { 1563 return nil, syserr.ErrProtocolNotAvailable 1564 } 1565 1566 stack := inet.StackFromContext(t) 1567 if stack == nil { 1568 return nil, syserr.ErrNoDevice 1569 } 1570 info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, false) 1571 if err != nil { 1572 return nil, err 1573 } 1574 return &info, nil 1575 1576 case linux.IPT_SO_GET_ENTRIES: 1577 if outLen < linux.SizeOfIPTGetEntries { 1578 return nil, syserr.ErrInvalidArgument 1579 } 1580 1581 // Only valid for raw IPv4 sockets. 1582 if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { 1583 return nil, syserr.ErrProtocolNotAvailable 1584 } 1585 1586 stack := inet.StackFromContext(t) 1587 if stack == nil { 1588 return nil, syserr.ErrNoDevice 1589 } 1590 entries, err := netfilter.GetEntries4(t, stack.(*Stack).Stack, outPtr, outLen) 1591 if err != nil { 1592 return nil, err 1593 } 1594 return &entries, nil 1595 1596 case linux.IPT_SO_GET_REVISION_TARGET: 1597 if outLen < linux.SizeOfXTGetRevision { 1598 return nil, syserr.ErrInvalidArgument 1599 } 1600 1601 // Only valid for raw IPv4 sockets. 1602 if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { 1603 return nil, syserr.ErrProtocolNotAvailable 1604 } 1605 1606 stack := inet.StackFromContext(t) 1607 if stack == nil { 1608 return nil, syserr.ErrNoDevice 1609 } 1610 ret, err := netfilter.TargetRevision(t, outPtr, header.IPv4ProtocolNumber) 1611 if err != nil { 1612 return nil, err 1613 } 1614 return &ret, nil 1615 1616 default: 1617 emitUnimplementedEventIP(t, name) 1618 } 1619 return nil, syserr.ErrProtocolNotAvailable 1620 } 1621 1622 // SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by 1623 // tcpip.Endpoint. 1624 func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error { 1625 // TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is 1626 // implemented specifically for netstack.SocketOperations rather than 1627 // commonEndpoint. commonEndpoint should be extended to support socket 1628 // options where the implementation is not shared, as unix sockets need 1629 // their own support for SO_TIMESTAMP. 1630 if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP { 1631 if len(optVal) < sizeOfInt32 { 1632 return syserr.ErrInvalidArgument 1633 } 1634 s.readMu.Lock() 1635 defer s.readMu.Unlock() 1636 s.sockOptTimestamp = hostarch.ByteOrder.Uint32(optVal) != 0 1637 return nil 1638 } 1639 if level == linux.SOL_TCP && name == linux.TCP_INQ { 1640 if len(optVal) < sizeOfInt32 { 1641 return syserr.ErrInvalidArgument 1642 } 1643 s.readMu.Lock() 1644 defer s.readMu.Unlock() 1645 s.sockOptInq = hostarch.ByteOrder.Uint32(optVal) != 0 1646 return nil 1647 } 1648 1649 return SetSockOpt(t, s, s.Endpoint, level, name, optVal) 1650 } 1651 1652 // SetSockOpt can be used to implement the linux syscall setsockopt(2) for 1653 // sockets backed by a commonEndpoint. 1654 func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int, name int, optVal []byte) *syserr.Error { 1655 switch level { 1656 case linux.SOL_SOCKET: 1657 return setSockOptSocket(t, s, ep, name, optVal) 1658 1659 case linux.SOL_TCP: 1660 return setSockOptTCP(t, s, ep, name, optVal) 1661 1662 case linux.SOL_IPV6: 1663 return setSockOptIPv6(t, s, ep, name, optVal) 1664 1665 case linux.SOL_IP: 1666 return setSockOptIP(t, s, ep, name, optVal) 1667 1668 case linux.SOL_PACKET: 1669 // gVisor doesn't support any SOL_PACKET options just return not 1670 // supported. Returning nil here will result in tcpdump thinking AF_PACKET 1671 // features are supported and proceed to use them and break. 1672 t.Kernel().EmitUnimplementedEvent(t) 1673 return syserr.ErrProtocolNotAvailable 1674 1675 case linux.SOL_UDP, 1676 linux.SOL_ICMPV6, 1677 linux.SOL_RAW: 1678 1679 t.Kernel().EmitUnimplementedEvent(t) 1680 } 1681 1682 return nil 1683 } 1684 1685 // setSockOptSocket implements SetSockOpt when level is SOL_SOCKET. 1686 func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 1687 switch name { 1688 case linux.SO_SNDBUF: 1689 if len(optVal) < sizeOfInt32 { 1690 return syserr.ErrInvalidArgument 1691 } 1692 1693 v := hostarch.ByteOrder.Uint32(optVal) 1694 ep.SocketOptions().SetSendBufferSize(int64(v), true /* notify */) 1695 return nil 1696 1697 case linux.SO_RCVBUF: 1698 if len(optVal) < sizeOfInt32 { 1699 return syserr.ErrInvalidArgument 1700 } 1701 1702 v := hostarch.ByteOrder.Uint32(optVal) 1703 ep.SocketOptions().SetReceiveBufferSize(int64(v), true /* notify */) 1704 return nil 1705 1706 case linux.SO_REUSEADDR: 1707 if len(optVal) < sizeOfInt32 { 1708 return syserr.ErrInvalidArgument 1709 } 1710 1711 v := hostarch.ByteOrder.Uint32(optVal) 1712 ep.SocketOptions().SetReuseAddress(v != 0) 1713 return nil 1714 1715 case linux.SO_REUSEPORT: 1716 if len(optVal) < sizeOfInt32 { 1717 return syserr.ErrInvalidArgument 1718 } 1719 1720 v := hostarch.ByteOrder.Uint32(optVal) 1721 ep.SocketOptions().SetReusePort(v != 0) 1722 return nil 1723 1724 case linux.SO_BINDTODEVICE: 1725 n := bytes.IndexByte(optVal, 0) 1726 if n == -1 { 1727 n = len(optVal) 1728 } 1729 name := string(optVal[:n]) 1730 if name == "" { 1731 return syserr.TranslateNetstackError(ep.SocketOptions().SetBindToDevice(0)) 1732 } 1733 s := t.NetworkContext() 1734 if s == nil { 1735 return syserr.ErrNoDevice 1736 } 1737 for nicID, nic := range s.Interfaces() { 1738 if nic.Name == name { 1739 return syserr.TranslateNetstackError(ep.SocketOptions().SetBindToDevice(nicID)) 1740 } 1741 } 1742 return syserr.ErrUnknownDevice 1743 1744 case linux.SO_BROADCAST: 1745 if len(optVal) < sizeOfInt32 { 1746 return syserr.ErrInvalidArgument 1747 } 1748 1749 v := hostarch.ByteOrder.Uint32(optVal) 1750 ep.SocketOptions().SetBroadcast(v != 0) 1751 return nil 1752 1753 case linux.SO_PASSCRED: 1754 if len(optVal) < sizeOfInt32 { 1755 return syserr.ErrInvalidArgument 1756 } 1757 1758 v := hostarch.ByteOrder.Uint32(optVal) 1759 ep.SocketOptions().SetPassCred(v != 0) 1760 return nil 1761 1762 case linux.SO_KEEPALIVE: 1763 if len(optVal) < sizeOfInt32 { 1764 return syserr.ErrInvalidArgument 1765 } 1766 1767 v := hostarch.ByteOrder.Uint32(optVal) 1768 ep.SocketOptions().SetKeepAlive(v != 0) 1769 return nil 1770 1771 case linux.SO_SNDTIMEO: 1772 if len(optVal) < linux.SizeOfTimeval { 1773 return syserr.ErrInvalidArgument 1774 } 1775 1776 var v linux.Timeval 1777 v.UnmarshalBytes(optVal[:linux.SizeOfTimeval]) 1778 if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { 1779 return syserr.ErrDomain 1780 } 1781 s.SetSendTimeout(v.ToNsecCapped()) 1782 return nil 1783 1784 case linux.SO_RCVTIMEO: 1785 if len(optVal) < linux.SizeOfTimeval { 1786 return syserr.ErrInvalidArgument 1787 } 1788 1789 var v linux.Timeval 1790 v.UnmarshalBytes(optVal[:linux.SizeOfTimeval]) 1791 if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { 1792 return syserr.ErrDomain 1793 } 1794 s.SetRecvTimeout(v.ToNsecCapped()) 1795 return nil 1796 1797 case linux.SO_OOBINLINE: 1798 if len(optVal) < sizeOfInt32 { 1799 return syserr.ErrInvalidArgument 1800 } 1801 1802 v := hostarch.ByteOrder.Uint32(optVal) 1803 ep.SocketOptions().SetOutOfBandInline(v != 0) 1804 return nil 1805 1806 case linux.SO_NO_CHECK: 1807 if len(optVal) < sizeOfInt32 { 1808 return syserr.ErrInvalidArgument 1809 } 1810 1811 v := hostarch.ByteOrder.Uint32(optVal) 1812 ep.SocketOptions().SetNoChecksum(v != 0) 1813 return nil 1814 1815 case linux.SO_LINGER: 1816 if len(optVal) < linux.SizeOfLinger { 1817 return syserr.ErrInvalidArgument 1818 } 1819 1820 var v linux.Linger 1821 v.UnmarshalBytes(optVal[:linux.SizeOfLinger]) 1822 1823 if v != (linux.Linger{}) { 1824 socket.SetSockOptEmitUnimplementedEvent(t, name) 1825 } 1826 1827 ep.SocketOptions().SetLinger(tcpip.LingerOption{ 1828 Enabled: v.OnOff != 0, 1829 Timeout: time.Second * time.Duration(v.Linger), 1830 }) 1831 return nil 1832 1833 case linux.SO_DETACH_FILTER: 1834 // optval is ignored. 1835 var v tcpip.SocketDetachFilterOption 1836 return syserr.TranslateNetstackError(ep.SetSockOpt(&v)) 1837 1838 default: 1839 socket.SetSockOptEmitUnimplementedEvent(t, name) 1840 } 1841 1842 return nil 1843 } 1844 1845 // setSockOptTCP implements SetSockOpt when level is SOL_TCP. 1846 func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 1847 if _, skType, skProto := s.Type(); !isTCPSocket(skType, skProto) { 1848 log.Warningf("SOL_TCP options are only supported on TCP sockets: skType, skProto = %v, %d", skType, skProto) 1849 return syserr.ErrUnknownProtocolOption 1850 } 1851 1852 switch name { 1853 case linux.TCP_NODELAY: 1854 if len(optVal) < sizeOfInt32 { 1855 return syserr.ErrInvalidArgument 1856 } 1857 1858 v := hostarch.ByteOrder.Uint32(optVal) 1859 ep.SocketOptions().SetDelayOption(v == 0) 1860 return nil 1861 1862 case linux.TCP_CORK: 1863 if len(optVal) < sizeOfInt32 { 1864 return syserr.ErrInvalidArgument 1865 } 1866 1867 v := hostarch.ByteOrder.Uint32(optVal) 1868 ep.SocketOptions().SetCorkOption(v != 0) 1869 return nil 1870 1871 case linux.TCP_QUICKACK: 1872 if len(optVal) < sizeOfInt32 { 1873 return syserr.ErrInvalidArgument 1874 } 1875 1876 v := hostarch.ByteOrder.Uint32(optVal) 1877 ep.SocketOptions().SetQuickAck(v != 0) 1878 return nil 1879 1880 case linux.TCP_MAXSEG: 1881 if len(optVal) < sizeOfInt32 { 1882 return syserr.ErrInvalidArgument 1883 } 1884 1885 v := hostarch.ByteOrder.Uint32(optVal) 1886 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.MaxSegOption, int(v))) 1887 1888 case linux.TCP_KEEPIDLE: 1889 if len(optVal) < sizeOfInt32 { 1890 return syserr.ErrInvalidArgument 1891 } 1892 1893 v := hostarch.ByteOrder.Uint32(optVal) 1894 if v < 1 || v > linux.MAX_TCP_KEEPIDLE { 1895 return syserr.ErrInvalidArgument 1896 } 1897 opt := tcpip.KeepaliveIdleOption(time.Second * time.Duration(v)) 1898 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 1899 1900 case linux.TCP_KEEPINTVL: 1901 if len(optVal) < sizeOfInt32 { 1902 return syserr.ErrInvalidArgument 1903 } 1904 1905 v := hostarch.ByteOrder.Uint32(optVal) 1906 if v < 1 || v > linux.MAX_TCP_KEEPINTVL { 1907 return syserr.ErrInvalidArgument 1908 } 1909 opt := tcpip.KeepaliveIntervalOption(time.Second * time.Duration(v)) 1910 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 1911 1912 case linux.TCP_KEEPCNT: 1913 if len(optVal) < sizeOfInt32 { 1914 return syserr.ErrInvalidArgument 1915 } 1916 1917 v := hostarch.ByteOrder.Uint32(optVal) 1918 if v < 1 || v > linux.MAX_TCP_KEEPCNT { 1919 return syserr.ErrInvalidArgument 1920 } 1921 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.KeepaliveCountOption, int(v))) 1922 1923 case linux.TCP_USER_TIMEOUT: 1924 if len(optVal) < sizeOfInt32 { 1925 return syserr.ErrInvalidArgument 1926 } 1927 1928 v := int32(hostarch.ByteOrder.Uint32(optVal)) 1929 if v < 0 { 1930 return syserr.ErrInvalidArgument 1931 } 1932 opt := tcpip.TCPUserTimeoutOption(time.Millisecond * time.Duration(v)) 1933 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 1934 1935 case linux.TCP_CONGESTION: 1936 v := tcpip.CongestionControlOption(optVal) 1937 if err := ep.SetSockOpt(&v); err != nil { 1938 return syserr.TranslateNetstackError(err) 1939 } 1940 return nil 1941 1942 case linux.TCP_LINGER2: 1943 if len(optVal) < sizeOfInt32 { 1944 return syserr.ErrInvalidArgument 1945 } 1946 1947 v := int32(hostarch.ByteOrder.Uint32(optVal)) 1948 opt := tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v)) 1949 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 1950 1951 case linux.TCP_DEFER_ACCEPT: 1952 if len(optVal) < sizeOfInt32 { 1953 return syserr.ErrInvalidArgument 1954 } 1955 v := int32(hostarch.ByteOrder.Uint32(optVal)) 1956 if v < 0 { 1957 v = 0 1958 } 1959 opt := tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v)) 1960 return syserr.TranslateNetstackError(ep.SetSockOpt(&opt)) 1961 1962 case linux.TCP_SYNCNT: 1963 if len(optVal) < sizeOfInt32 { 1964 return syserr.ErrInvalidArgument 1965 } 1966 v := hostarch.ByteOrder.Uint32(optVal) 1967 1968 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v))) 1969 1970 case linux.TCP_WINDOW_CLAMP: 1971 if len(optVal) < sizeOfInt32 { 1972 return syserr.ErrInvalidArgument 1973 } 1974 v := hostarch.ByteOrder.Uint32(optVal) 1975 1976 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v))) 1977 1978 case linux.TCP_REPAIR_OPTIONS: 1979 t.Kernel().EmitUnimplementedEvent(t) 1980 1981 default: 1982 emitUnimplementedEventTCP(t, name) 1983 } 1984 1985 return nil 1986 } 1987 1988 // setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6. 1989 func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 1990 if _, ok := ep.(tcpip.Endpoint); !ok { 1991 log.Warningf("SOL_IPV6 options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 1992 return syserr.ErrUnknownProtocolOption 1993 } 1994 1995 family, skType, skProto := s.Type() 1996 if family != linux.AF_INET6 { 1997 return syserr.ErrUnknownProtocolOption 1998 } 1999 2000 switch name { 2001 case linux.IPV6_V6ONLY: 2002 if len(optVal) < sizeOfInt32 { 2003 return syserr.ErrInvalidArgument 2004 } 2005 2006 if isTCPSocket(skType, skProto) && tcp.EndpointState(ep.State()) != tcp.StateInitial { 2007 return syserr.ErrInvalidEndpointState 2008 } else if isUDPSocket(skType, skProto) && udp.EndpointState(ep.State()) != udp.StateInitial { 2009 return syserr.ErrInvalidEndpointState 2010 } 2011 2012 v := hostarch.ByteOrder.Uint32(optVal) 2013 ep.SocketOptions().SetV6Only(v != 0) 2014 return nil 2015 2016 case linux.IPV6_ADD_MEMBERSHIP: 2017 req, err := copyInMulticastV6Request(optVal) 2018 if err != nil { 2019 return err 2020 } 2021 2022 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.AddMembershipOption{ 2023 NIC: tcpip.NICID(req.InterfaceIndex), 2024 MulticastAddr: tcpip.Address(req.MulticastAddr[:]), 2025 })) 2026 2027 case linux.IPV6_DROP_MEMBERSHIP: 2028 req, err := copyInMulticastV6Request(optVal) 2029 if err != nil { 2030 return err 2031 } 2032 2033 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.RemoveMembershipOption{ 2034 NIC: tcpip.NICID(req.InterfaceIndex), 2035 MulticastAddr: tcpip.Address(req.MulticastAddr[:]), 2036 })) 2037 2038 case linux.IPV6_IPSEC_POLICY, 2039 linux.IPV6_JOIN_ANYCAST, 2040 linux.IPV6_LEAVE_ANYCAST, 2041 // TODO(b/148887420): Add support for IPV6_PKTINFO. 2042 linux.IPV6_PKTINFO, 2043 linux.IPV6_ROUTER_ALERT, 2044 linux.IPV6_XFRM_POLICY, 2045 linux.MCAST_BLOCK_SOURCE, 2046 linux.MCAST_JOIN_GROUP, 2047 linux.MCAST_JOIN_SOURCE_GROUP, 2048 linux.MCAST_LEAVE_GROUP, 2049 linux.MCAST_LEAVE_SOURCE_GROUP, 2050 linux.MCAST_UNBLOCK_SOURCE: 2051 2052 t.Kernel().EmitUnimplementedEvent(t) 2053 2054 case linux.IPV6_RECVORIGDSTADDR: 2055 if len(optVal) < sizeOfInt32 { 2056 return syserr.ErrInvalidArgument 2057 } 2058 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2059 2060 ep.SocketOptions().SetReceiveOriginalDstAddress(v != 0) 2061 return nil 2062 2063 case linux.IPV6_TCLASS: 2064 if len(optVal) < sizeOfInt32 { 2065 return syserr.ErrInvalidArgument 2066 } 2067 v := int32(hostarch.ByteOrder.Uint32(optVal)) 2068 if v < -1 || v > 255 { 2069 return syserr.ErrInvalidArgument 2070 } 2071 if v == -1 { 2072 v = 0 2073 } 2074 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.IPv6TrafficClassOption, int(v))) 2075 2076 case linux.IPV6_RECVTCLASS: 2077 v, err := parseIntOrChar(optVal) 2078 if err != nil { 2079 return err 2080 } 2081 2082 ep.SocketOptions().SetReceiveTClass(v != 0) 2083 return nil 2084 case linux.IPV6_RECVERR: 2085 if len(optVal) == 0 { 2086 return nil 2087 } 2088 v, err := parseIntOrChar(optVal) 2089 if err != nil { 2090 return err 2091 } 2092 ep.SocketOptions().SetRecvError(v != 0) 2093 return nil 2094 2095 case linux.IP6T_SO_SET_REPLACE: 2096 if len(optVal) < linux.SizeOfIP6TReplace { 2097 return syserr.ErrInvalidArgument 2098 } 2099 2100 // Only valid for raw IPv6 sockets. 2101 if skType != linux.SOCK_RAW { 2102 return syserr.ErrProtocolNotAvailable 2103 } 2104 2105 stack := inet.StackFromContext(t) 2106 if stack == nil { 2107 return syserr.ErrNoDevice 2108 } 2109 // Stack must be a netstack stack. 2110 return netfilter.SetEntries(t, stack.(*Stack).Stack, optVal, true) 2111 2112 case linux.IP6T_SO_SET_ADD_COUNTERS: 2113 log.Infof("IP6T_SO_SET_ADD_COUNTERS is not supported") 2114 return nil 2115 2116 default: 2117 emitUnimplementedEventIPv6(t, name) 2118 } 2119 2120 return nil 2121 } 2122 2123 var ( 2124 inetMulticastRequestSize = (*linux.InetMulticastRequest)(nil).SizeBytes() 2125 inetMulticastRequestWithNICSize = (*linux.InetMulticastRequestWithNIC)(nil).SizeBytes() 2126 inet6MulticastRequestSize = (*linux.Inet6MulticastRequest)(nil).SizeBytes() 2127 ) 2128 2129 // copyInMulticastRequest copies in a variable-size multicast request. The 2130 // kernel determines which structure was passed by its length. IP_MULTICAST_IF 2131 // supports ip_mreqn, ip_mreq and in_addr, while IP_ADD_MEMBERSHIP and 2132 // IP_DROP_MEMBERSHIP only support ip_mreqn and ip_mreq. To handle this, 2133 // allowAddr controls whether in_addr is accepted or rejected. 2134 func copyInMulticastRequest(optVal []byte, allowAddr bool) (linux.InetMulticastRequestWithNIC, *syserr.Error) { 2135 if len(optVal) < len(linux.InetAddr{}) { 2136 return linux.InetMulticastRequestWithNIC{}, syserr.ErrInvalidArgument 2137 } 2138 2139 if len(optVal) < inetMulticastRequestSize { 2140 if !allowAddr { 2141 return linux.InetMulticastRequestWithNIC{}, syserr.ErrInvalidArgument 2142 } 2143 2144 var req linux.InetMulticastRequestWithNIC 2145 copy(req.InterfaceAddr[:], optVal) 2146 return req, nil 2147 } 2148 2149 if len(optVal) >= inetMulticastRequestWithNICSize { 2150 var req linux.InetMulticastRequestWithNIC 2151 req.UnmarshalUnsafe(optVal[:inetMulticastRequestWithNICSize]) 2152 return req, nil 2153 } 2154 2155 var req linux.InetMulticastRequestWithNIC 2156 req.InetMulticastRequest.UnmarshalUnsafe(optVal[:inetMulticastRequestSize]) 2157 return req, nil 2158 } 2159 2160 func copyInMulticastV6Request(optVal []byte) (linux.Inet6MulticastRequest, *syserr.Error) { 2161 if len(optVal) < inet6MulticastRequestSize { 2162 return linux.Inet6MulticastRequest{}, syserr.ErrInvalidArgument 2163 } 2164 2165 var req linux.Inet6MulticastRequest 2166 req.UnmarshalUnsafe(optVal[:inet6MulticastRequestSize]) 2167 return req, nil 2168 } 2169 2170 // parseIntOrChar copies either a 32-bit int or an 8-bit uint out of buf. 2171 // 2172 // net/ipv4/ip_sockglue.c:do_ip_setsockopt does this for its socket options. 2173 func parseIntOrChar(buf []byte) (int32, *syserr.Error) { 2174 if len(buf) == 0 { 2175 return 0, syserr.ErrInvalidArgument 2176 } 2177 2178 if len(buf) >= sizeOfInt32 { 2179 return int32(hostarch.ByteOrder.Uint32(buf)), nil 2180 } 2181 2182 return int32(buf[0]), nil 2183 } 2184 2185 // setSockOptIP implements SetSockOpt when level is SOL_IP. 2186 func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error { 2187 if _, ok := ep.(tcpip.Endpoint); !ok { 2188 log.Warningf("SOL_IP options not supported on endpoints other than tcpip.Endpoint: option = %d", name) 2189 return syserr.ErrUnknownProtocolOption 2190 } 2191 2192 switch name { 2193 case linux.IP_MULTICAST_TTL: 2194 v, err := parseIntOrChar(optVal) 2195 if err != nil { 2196 return err 2197 } 2198 2199 if v == -1 { 2200 // Linux translates -1 to 1. 2201 v = 1 2202 } 2203 if v < 0 || v > 255 { 2204 return syserr.ErrInvalidArgument 2205 } 2206 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.MulticastTTLOption, int(v))) 2207 2208 case linux.IP_ADD_MEMBERSHIP: 2209 req, err := copyInMulticastRequest(optVal, false /* allowAddr */) 2210 if err != nil { 2211 return err 2212 } 2213 2214 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.AddMembershipOption{ 2215 NIC: tcpip.NICID(req.InterfaceIndex), 2216 // TODO(igudger): Change AddMembership to use the standard 2217 // any address representation. 2218 InterfaceAddr: tcpip.Address(req.InterfaceAddr[:]), 2219 MulticastAddr: tcpip.Address(req.MulticastAddr[:]), 2220 })) 2221 2222 case linux.IP_DROP_MEMBERSHIP: 2223 req, err := copyInMulticastRequest(optVal, false /* allowAddr */) 2224 if err != nil { 2225 return err 2226 } 2227 2228 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.RemoveMembershipOption{ 2229 NIC: tcpip.NICID(req.InterfaceIndex), 2230 // TODO(igudger): Change DropMembership to use the standard 2231 // any address representation. 2232 InterfaceAddr: tcpip.Address(req.InterfaceAddr[:]), 2233 MulticastAddr: tcpip.Address(req.MulticastAddr[:]), 2234 })) 2235 2236 case linux.IP_MULTICAST_IF: 2237 req, err := copyInMulticastRequest(optVal, true /* allowAddr */) 2238 if err != nil { 2239 return err 2240 } 2241 2242 return syserr.TranslateNetstackError(ep.SetSockOpt(&tcpip.MulticastInterfaceOption{ 2243 NIC: tcpip.NICID(req.InterfaceIndex), 2244 InterfaceAddr: socket.BytesToIPAddress(req.InterfaceAddr[:]), 2245 })) 2246 2247 case linux.IP_MULTICAST_LOOP: 2248 v, err := parseIntOrChar(optVal) 2249 if err != nil { 2250 return err 2251 } 2252 2253 ep.SocketOptions().SetMulticastLoop(v != 0) 2254 return nil 2255 2256 case linux.MCAST_JOIN_GROUP: 2257 // FIXME(b/124219304): Implement MCAST_JOIN_GROUP. 2258 t.Kernel().EmitUnimplementedEvent(t) 2259 return syserr.ErrInvalidArgument 2260 2261 case linux.IP_TTL: 2262 v, err := parseIntOrChar(optVal) 2263 if err != nil { 2264 return err 2265 } 2266 2267 // -1 means default TTL. 2268 if v == -1 { 2269 v = 0 2270 } else if v < 1 || v > 255 { 2271 return syserr.ErrInvalidArgument 2272 } 2273 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TTLOption, int(v))) 2274 2275 case linux.IP_TOS: 2276 if len(optVal) == 0 { 2277 return nil 2278 } 2279 v, err := parseIntOrChar(optVal) 2280 if err != nil { 2281 return err 2282 } 2283 return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.IPv4TOSOption, int(v))) 2284 2285 case linux.IP_RECVTOS: 2286 v, err := parseIntOrChar(optVal) 2287 if err != nil { 2288 return err 2289 } 2290 ep.SocketOptions().SetReceiveTOS(v != 0) 2291 return nil 2292 2293 case linux.IP_RECVERR: 2294 if len(optVal) == 0 { 2295 return nil 2296 } 2297 v, err := parseIntOrChar(optVal) 2298 if err != nil { 2299 return err 2300 } 2301 ep.SocketOptions().SetRecvError(v != 0) 2302 return nil 2303 2304 case linux.IP_PKTINFO: 2305 if len(optVal) == 0 { 2306 return nil 2307 } 2308 v, err := parseIntOrChar(optVal) 2309 if err != nil { 2310 return err 2311 } 2312 ep.SocketOptions().SetReceivePacketInfo(v != 0) 2313 return nil 2314 2315 case linux.IP_HDRINCL: 2316 if len(optVal) == 0 { 2317 return nil 2318 } 2319 v, err := parseIntOrChar(optVal) 2320 if err != nil { 2321 return err 2322 } 2323 ep.SocketOptions().SetHeaderIncluded(v != 0) 2324 return nil 2325 2326 case linux.IP_RECVORIGDSTADDR: 2327 if len(optVal) == 0 { 2328 return nil 2329 } 2330 v, err := parseIntOrChar(optVal) 2331 if err != nil { 2332 return err 2333 } 2334 2335 ep.SocketOptions().SetReceiveOriginalDstAddress(v != 0) 2336 return nil 2337 2338 case linux.IPT_SO_SET_REPLACE: 2339 if len(optVal) < linux.SizeOfIPTReplace { 2340 return syserr.ErrInvalidArgument 2341 } 2342 2343 // Only valid for raw IPv4 sockets. 2344 if family, skType, _ := s.Type(); family != linux.AF_INET || skType != linux.SOCK_RAW { 2345 return syserr.ErrProtocolNotAvailable 2346 } 2347 2348 stack := inet.StackFromContext(t) 2349 if stack == nil { 2350 return syserr.ErrNoDevice 2351 } 2352 // Stack must be a netstack stack. 2353 return netfilter.SetEntries(t, stack.(*Stack).Stack, optVal, false) 2354 2355 case linux.IPT_SO_SET_ADD_COUNTERS: 2356 log.Infof("IPT_SO_SET_ADD_COUNTERS is not supported") 2357 return nil 2358 2359 case linux.IP_ADD_SOURCE_MEMBERSHIP, 2360 linux.IP_BIND_ADDRESS_NO_PORT, 2361 linux.IP_BLOCK_SOURCE, 2362 linux.IP_CHECKSUM, 2363 linux.IP_DROP_SOURCE_MEMBERSHIP, 2364 linux.IP_FREEBIND, 2365 linux.IP_IPSEC_POLICY, 2366 linux.IP_MINTTL, 2367 linux.IP_MSFILTER, 2368 linux.IP_MTU_DISCOVER, 2369 linux.IP_MULTICAST_ALL, 2370 linux.IP_NODEFRAG, 2371 linux.IP_OPTIONS, 2372 linux.IP_PASSSEC, 2373 linux.IP_RECVFRAGSIZE, 2374 linux.IP_RECVOPTS, 2375 linux.IP_RECVTTL, 2376 linux.IP_RETOPTS, 2377 linux.IP_TRANSPARENT, 2378 linux.IP_UNBLOCK_SOURCE, 2379 linux.IP_UNICAST_IF, 2380 linux.IP_XFRM_POLICY, 2381 linux.MCAST_BLOCK_SOURCE, 2382 linux.MCAST_JOIN_SOURCE_GROUP, 2383 linux.MCAST_LEAVE_GROUP, 2384 linux.MCAST_LEAVE_SOURCE_GROUP, 2385 linux.MCAST_MSFILTER, 2386 linux.MCAST_UNBLOCK_SOURCE: 2387 2388 t.Kernel().EmitUnimplementedEvent(t) 2389 } 2390 2391 return nil 2392 } 2393 2394 // emitUnimplementedEventTCP emits unimplemented event if name is valid. This 2395 // function contains names that are common between Get and SetSockOpt when 2396 // level is SOL_TCP. 2397 func emitUnimplementedEventTCP(t *kernel.Task, name int) { 2398 switch name { 2399 case linux.TCP_CONGESTION, 2400 linux.TCP_CORK, 2401 linux.TCP_FASTOPEN, 2402 linux.TCP_FASTOPEN_CONNECT, 2403 linux.TCP_FASTOPEN_KEY, 2404 linux.TCP_FASTOPEN_NO_COOKIE, 2405 linux.TCP_QUEUE_SEQ, 2406 linux.TCP_REPAIR, 2407 linux.TCP_REPAIR_QUEUE, 2408 linux.TCP_REPAIR_WINDOW, 2409 linux.TCP_SAVED_SYN, 2410 linux.TCP_SAVE_SYN, 2411 linux.TCP_THIN_DUPACK, 2412 linux.TCP_THIN_LINEAR_TIMEOUTS, 2413 linux.TCP_TIMESTAMP, 2414 linux.TCP_ULP: 2415 2416 t.Kernel().EmitUnimplementedEvent(t) 2417 } 2418 } 2419 2420 // emitUnimplementedEventIPv6 emits unimplemented event if name is valid. It 2421 // contains names that are common between Get and SetSockOpt when level is 2422 // SOL_IPV6. 2423 func emitUnimplementedEventIPv6(t *kernel.Task, name int) { 2424 switch name { 2425 case linux.IPV6_2292DSTOPTS, 2426 linux.IPV6_2292HOPLIMIT, 2427 linux.IPV6_2292HOPOPTS, 2428 linux.IPV6_2292PKTINFO, 2429 linux.IPV6_2292PKTOPTIONS, 2430 linux.IPV6_2292RTHDR, 2431 linux.IPV6_ADDR_PREFERENCES, 2432 linux.IPV6_AUTOFLOWLABEL, 2433 linux.IPV6_DONTFRAG, 2434 linux.IPV6_DSTOPTS, 2435 linux.IPV6_FLOWINFO, 2436 linux.IPV6_FLOWINFO_SEND, 2437 linux.IPV6_FLOWLABEL_MGR, 2438 linux.IPV6_FREEBIND, 2439 linux.IPV6_HOPOPTS, 2440 linux.IPV6_MINHOPCOUNT, 2441 linux.IPV6_MTU, 2442 linux.IPV6_MTU_DISCOVER, 2443 linux.IPV6_MULTICAST_ALL, 2444 linux.IPV6_MULTICAST_HOPS, 2445 linux.IPV6_MULTICAST_IF, 2446 linux.IPV6_MULTICAST_LOOP, 2447 linux.IPV6_RECVDSTOPTS, 2448 linux.IPV6_RECVFRAGSIZE, 2449 linux.IPV6_RECVHOPLIMIT, 2450 linux.IPV6_RECVHOPOPTS, 2451 linux.IPV6_RECVPATHMTU, 2452 linux.IPV6_RECVPKTINFO, 2453 linux.IPV6_RECVRTHDR, 2454 linux.IPV6_RTHDR, 2455 linux.IPV6_RTHDRDSTOPTS, 2456 linux.IPV6_TCLASS, 2457 linux.IPV6_TRANSPARENT, 2458 linux.IPV6_UNICAST_HOPS, 2459 linux.IPV6_UNICAST_IF, 2460 linux.MCAST_MSFILTER, 2461 linux.IPV6_ADDRFORM: 2462 2463 t.Kernel().EmitUnimplementedEvent(t) 2464 } 2465 } 2466 2467 // emitUnimplementedEventIP emits unimplemented event if name is valid. It 2468 // contains names that are common between Get and SetSockOpt when level is 2469 // SOL_IP. 2470 func emitUnimplementedEventIP(t *kernel.Task, name int) { 2471 switch name { 2472 case linux.IP_TOS, 2473 linux.IP_TTL, 2474 linux.IP_OPTIONS, 2475 linux.IP_ROUTER_ALERT, 2476 linux.IP_RECVOPTS, 2477 linux.IP_RETOPTS, 2478 linux.IP_PKTINFO, 2479 linux.IP_PKTOPTIONS, 2480 linux.IP_MTU_DISCOVER, 2481 linux.IP_RECVTTL, 2482 linux.IP_RECVTOS, 2483 linux.IP_MTU, 2484 linux.IP_FREEBIND, 2485 linux.IP_IPSEC_POLICY, 2486 linux.IP_XFRM_POLICY, 2487 linux.IP_PASSSEC, 2488 linux.IP_TRANSPARENT, 2489 linux.IP_ORIGDSTADDR, 2490 linux.IP_MINTTL, 2491 linux.IP_NODEFRAG, 2492 linux.IP_CHECKSUM, 2493 linux.IP_BIND_ADDRESS_NO_PORT, 2494 linux.IP_RECVFRAGSIZE, 2495 linux.IP_MULTICAST_IF, 2496 linux.IP_MULTICAST_TTL, 2497 linux.IP_MULTICAST_LOOP, 2498 linux.IP_ADD_MEMBERSHIP, 2499 linux.IP_DROP_MEMBERSHIP, 2500 linux.IP_UNBLOCK_SOURCE, 2501 linux.IP_BLOCK_SOURCE, 2502 linux.IP_ADD_SOURCE_MEMBERSHIP, 2503 linux.IP_DROP_SOURCE_MEMBERSHIP, 2504 linux.IP_MSFILTER, 2505 linux.MCAST_JOIN_GROUP, 2506 linux.MCAST_BLOCK_SOURCE, 2507 linux.MCAST_UNBLOCK_SOURCE, 2508 linux.MCAST_LEAVE_GROUP, 2509 linux.MCAST_JOIN_SOURCE_GROUP, 2510 linux.MCAST_LEAVE_SOURCE_GROUP, 2511 linux.MCAST_MSFILTER, 2512 linux.IP_MULTICAST_ALL, 2513 linux.IP_UNICAST_IF: 2514 2515 t.Kernel().EmitUnimplementedEvent(t) 2516 } 2517 } 2518 2519 // GetSockName implements the linux syscall getsockname(2) for sockets backed by 2520 // tcpip.Endpoint. 2521 func (s *socketOpsCommon) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { 2522 addr, err := s.Endpoint.GetLocalAddress() 2523 if err != nil { 2524 return nil, 0, syserr.TranslateNetstackError(err) 2525 } 2526 2527 a, l := socket.ConvertAddress(s.family, addr) 2528 return a, l, nil 2529 } 2530 2531 // GetPeerName implements the linux syscall getpeername(2) for sockets backed by 2532 // tcpip.Endpoint. 2533 func (s *socketOpsCommon) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { 2534 addr, err := s.Endpoint.GetRemoteAddress() 2535 if err != nil { 2536 return nil, 0, syserr.TranslateNetstackError(err) 2537 } 2538 2539 a, l := socket.ConvertAddress(s.family, addr) 2540 return a, l, nil 2541 } 2542 2543 func (s *socketOpsCommon) fillCmsgInq(cmsg *socket.ControlMessages) { 2544 if !s.sockOptInq { 2545 return 2546 } 2547 rcvBufUsed, err := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption) 2548 if err != nil { 2549 return 2550 } 2551 cmsg.IP.HasInq = true 2552 cmsg.IP.Inq = int32(rcvBufUsed) 2553 } 2554 2555 func toLinuxPacketType(pktType tcpip.PacketType) uint8 { 2556 switch pktType { 2557 case tcpip.PacketHost: 2558 return linux.PACKET_HOST 2559 case tcpip.PacketOtherHost: 2560 return linux.PACKET_OTHERHOST 2561 case tcpip.PacketOutgoing: 2562 return linux.PACKET_OUTGOING 2563 case tcpip.PacketBroadcast: 2564 return linux.PACKET_BROADCAST 2565 case tcpip.PacketMulticast: 2566 return linux.PACKET_MULTICAST 2567 default: 2568 panic(fmt.Sprintf("unknown packet type: %d", pktType)) 2569 } 2570 } 2571 2572 // nonBlockingRead issues a non-blocking read. 2573 // 2574 // TODO(b/78348848): Support timestamps for stream sockets. 2575 func (s *socketOpsCommon) nonBlockingRead(ctx context.Context, dst usermem.IOSequence, peek, trunc, senderRequested bool) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { 2576 isPacket := s.isPacketBased() 2577 2578 readOptions := tcpip.ReadOptions{ 2579 Peek: peek, 2580 NeedRemoteAddr: senderRequested, 2581 NeedLinkPacketInfo: isPacket, 2582 } 2583 2584 // TCP sockets discard the data if MSG_TRUNC is set. 2585 // 2586 // This behavior is documented in man 7 tcp: 2587 // Since version 2.4, Linux supports the use of MSG_TRUNC in the flags 2588 // argument of recv(2) (and recvmsg(2)). This flag causes the received 2589 // bytes of data to be discarded, rather than passed back in a 2590 // caller-supplied buffer. 2591 var w io.Writer 2592 if !isPacket && trunc { 2593 w = &tcpip.LimitedWriter{ 2594 W: ioutil.Discard, 2595 N: dst.NumBytes(), 2596 } 2597 } else { 2598 w = dst.Writer(ctx) 2599 } 2600 2601 s.readMu.Lock() 2602 defer s.readMu.Unlock() 2603 2604 res, err := s.Endpoint.Read(w, readOptions) 2605 if _, ok := err.(*tcpip.ErrBadBuffer); ok && dst.NumBytes() == 0 { 2606 err = nil 2607 } 2608 if err != nil { 2609 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.TranslateNetstackError(err) 2610 } 2611 // Set the control message, even if 0 bytes were read. 2612 s.updateTimestamp(res.ControlMessages) 2613 2614 if isPacket { 2615 var addr linux.SockAddr 2616 var addrLen uint32 2617 if senderRequested { 2618 addr, addrLen = socket.ConvertAddress(s.family, res.RemoteAddr) 2619 switch v := addr.(type) { 2620 case *linux.SockAddrLink: 2621 v.Protocol = socket.Htons(uint16(res.LinkPacketInfo.Protocol)) 2622 v.PacketType = toLinuxPacketType(res.LinkPacketInfo.PktType) 2623 } 2624 } 2625 2626 msgLen := res.Count 2627 if trunc { 2628 msgLen = res.Total 2629 } 2630 2631 var flags int 2632 if res.Total > res.Count { 2633 flags |= linux.MSG_TRUNC 2634 } 2635 2636 return msgLen, flags, addr, addrLen, s.controlMessages(res.ControlMessages), nil 2637 } 2638 2639 if peek { 2640 // MSG_TRUNC with MSG_PEEK on a TCP socket returns the 2641 // amount that could be read, and does not write to buffer. 2642 if trunc { 2643 // TCP endpoint does not return the total bytes in buffer as numTotal. 2644 // We need to query it from socket option. 2645 rql, err := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption) 2646 if err != nil { 2647 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.TranslateNetstackError(err) 2648 } 2649 msgLen := int(dst.NumBytes()) 2650 if msgLen > rql { 2651 msgLen = rql 2652 } 2653 return msgLen, 0, nil, 0, socket.ControlMessages{}, nil 2654 } 2655 } else if n := res.Count; n != 0 { 2656 s.Endpoint.ModerateRecvBuf(n) 2657 } 2658 2659 cmsg := s.controlMessages(res.ControlMessages) 2660 s.fillCmsgInq(&cmsg) 2661 return res.Count, 0, nil, 0, cmsg, syserr.TranslateNetstackError(err) 2662 } 2663 2664 func (s *socketOpsCommon) controlMessages(cm tcpip.ControlMessages) socket.ControlMessages { 2665 readCM := socket.NewIPControlMessages(s.family, cm) 2666 return socket.ControlMessages{ 2667 IP: socket.IPControlMessages{ 2668 HasTimestamp: readCM.HasTimestamp && s.sockOptTimestamp, 2669 Timestamp: readCM.Timestamp, 2670 HasInq: readCM.HasInq, 2671 Inq: readCM.Inq, 2672 HasTOS: readCM.HasTOS, 2673 TOS: readCM.TOS, 2674 HasTClass: readCM.HasTClass, 2675 TClass: readCM.TClass, 2676 HasIPPacketInfo: readCM.HasIPPacketInfo, 2677 PacketInfo: readCM.PacketInfo, 2678 OriginalDstAddress: readCM.OriginalDstAddress, 2679 SockErr: readCM.SockErr, 2680 }, 2681 } 2682 } 2683 2684 // updateTimestamp sets the timestamp for SIOCGSTAMP. It should be called after 2685 // successfully writing packet data out to userspace. 2686 // 2687 // Precondition: s.readMu must be locked. 2688 func (s *socketOpsCommon) updateTimestamp(cm tcpip.ControlMessages) { 2689 // Save the SIOCGSTAMP timestamp only if SO_TIMESTAMP is disabled. 2690 if !s.sockOptTimestamp { 2691 s.timestampValid = true 2692 s.timestampNS = cm.Timestamp 2693 } 2694 } 2695 2696 // dequeueErr is analogous to net/core/skbuff.c:sock_dequeue_err_skb(). 2697 func (s *socketOpsCommon) dequeueErr() *tcpip.SockError { 2698 so := s.Endpoint.SocketOptions() 2699 err := so.DequeueErr() 2700 if err == nil { 2701 return nil 2702 } 2703 2704 // Update socket error to reflect ICMP errors in queue. 2705 if nextErr := so.PeekErr(); nextErr != nil && nextErr.Cause.Origin().IsICMPErr() { 2706 so.SetLastError(nextErr.Err) 2707 } else if err.Cause.Origin().IsICMPErr() { 2708 so.SetLastError(nil) 2709 } 2710 return err 2711 } 2712 2713 // addrFamilyFromNetProto returns the address family identifier for the given 2714 // network protocol. 2715 func addrFamilyFromNetProto(net tcpip.NetworkProtocolNumber) int { 2716 switch net { 2717 case header.IPv4ProtocolNumber: 2718 return linux.AF_INET 2719 case header.IPv6ProtocolNumber: 2720 return linux.AF_INET6 2721 default: 2722 panic(fmt.Sprintf("invalid net proto for addr family inference: %d", net)) 2723 } 2724 } 2725 2726 // recvErr handles MSG_ERRQUEUE for recvmsg(2). 2727 // This is analogous to net/ipv4/ip_sockglue.c:ip_recv_error(). 2728 func (s *socketOpsCommon) recvErr(t *kernel.Task, dst usermem.IOSequence) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { 2729 sockErr := s.dequeueErr() 2730 if sockErr == nil { 2731 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain 2732 } 2733 2734 // The payload of the original packet that caused the error is passed as 2735 // normal data via msg_iovec. -- recvmsg(2) 2736 msgFlags := linux.MSG_ERRQUEUE 2737 if int(dst.NumBytes()) < len(sockErr.Payload) { 2738 msgFlags |= linux.MSG_TRUNC 2739 } 2740 n, err := dst.CopyOut(t, sockErr.Payload) 2741 2742 // The original destination address of the datagram that caused the error is 2743 // supplied via msg_name. -- recvmsg(2) 2744 dstAddr, dstAddrLen := socket.ConvertAddress(addrFamilyFromNetProto(sockErr.NetProto), sockErr.Dst) 2745 cmgs := socket.ControlMessages{IP: socket.NewIPControlMessages(s.family, tcpip.ControlMessages{SockErr: sockErr})} 2746 return n, msgFlags, dstAddr, dstAddrLen, cmgs, syserr.FromError(err) 2747 } 2748 2749 // RecvMsg implements the linux syscall recvmsg(2) for sockets backed by 2750 // tcpip.Endpoint. 2751 func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) { 2752 if flags&linux.MSG_ERRQUEUE != 0 { 2753 return s.recvErr(t, dst) 2754 } 2755 2756 trunc := flags&linux.MSG_TRUNC != 0 2757 peek := flags&linux.MSG_PEEK != 0 2758 dontWait := flags&linux.MSG_DONTWAIT != 0 2759 waitAll := flags&linux.MSG_WAITALL != 0 2760 if senderRequested && !s.isPacketBased() { 2761 // Stream sockets ignore the sender address. 2762 senderRequested = false 2763 } 2764 n, msgFlags, senderAddr, senderAddrLen, controlMessages, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested) 2765 2766 if s.isPacketBased() && err == syserr.ErrClosedForReceive && flags&linux.MSG_DONTWAIT != 0 { 2767 // In this situation we should return EAGAIN. 2768 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain 2769 } 2770 2771 if err != nil && (err != syserr.ErrWouldBlock || dontWait) { 2772 // Read failed and we should not retry. 2773 return 0, 0, nil, 0, socket.ControlMessages{}, err 2774 } 2775 2776 if err == nil && (dontWait || !waitAll || s.isPacketBased() || int64(n) >= dst.NumBytes()) { 2777 // We got all the data we need. 2778 return 2779 } 2780 2781 // Don't overwrite any data we received. 2782 dst = dst.DropFirst(n) 2783 2784 // We'll have to block. Register for notifications and keep trying to 2785 // send all the data. 2786 e, ch := waiter.NewChannelEntry(nil) 2787 s.EventRegister(&e, waiter.ReadableEvents) 2788 defer s.EventUnregister(&e) 2789 2790 for { 2791 var rn int 2792 rn, msgFlags, senderAddr, senderAddrLen, controlMessages, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested) 2793 n += rn 2794 if err != nil && err != syserr.ErrWouldBlock { 2795 // Always stop on errors other than would block as we generally 2796 // won't be able to get any more data. Eat the error if we got 2797 // any data. 2798 if n > 0 { 2799 err = nil 2800 } 2801 return 2802 } 2803 if err == nil && (s.isPacketBased() || !waitAll || int64(rn) >= dst.NumBytes()) { 2804 // We got all the data we need. 2805 return 2806 } 2807 dst = dst.DropFirst(rn) 2808 2809 if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { 2810 if n > 0 { 2811 return n, msgFlags, senderAddr, senderAddrLen, controlMessages, nil 2812 } 2813 if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 2814 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain 2815 } 2816 return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) 2817 } 2818 } 2819 } 2820 2821 // SendMsg implements the linux syscall sendmsg(2) for sockets backed by 2822 // tcpip.Endpoint. 2823 func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { 2824 // Reject Unix control messages. 2825 if !controlMessages.Unix.Empty() { 2826 return 0, syserr.ErrInvalidArgument 2827 } 2828 2829 var addr *tcpip.FullAddress 2830 if len(to) > 0 { 2831 addrBuf, family, err := socket.AddressAndFamily(to) 2832 if err != nil { 2833 return 0, err 2834 } 2835 if err := s.checkFamily(family, false /* exact */); err != nil { 2836 return 0, err 2837 } 2838 addrBuf = s.mapFamily(addrBuf, family) 2839 2840 addr = &addrBuf 2841 } 2842 2843 opts := tcpip.WriteOptions{ 2844 To: addr, 2845 More: flags&linux.MSG_MORE != 0, 2846 EndOfRecord: flags&linux.MSG_EOR != 0, 2847 } 2848 2849 r := src.Reader(t) 2850 var ( 2851 total int64 2852 entry waiter.Entry 2853 ch <-chan struct{} 2854 ) 2855 for { 2856 n, err := s.Endpoint.Write(r, opts) 2857 total += n 2858 if flags&linux.MSG_DONTWAIT != 0 { 2859 return int(total), syserr.TranslateNetstackError(err) 2860 } 2861 block := true 2862 switch err.(type) { 2863 case nil: 2864 block = total != src.NumBytes() 2865 case *tcpip.ErrWouldBlock: 2866 default: 2867 block = false 2868 } 2869 if block { 2870 if ch == nil { 2871 // We'll have to block. Register for notification and keep trying to 2872 // send all the data. 2873 entry, ch = waiter.NewChannelEntry(nil) 2874 s.EventRegister(&entry, waiter.WritableEvents) 2875 defer s.EventUnregister(&entry) 2876 } else { 2877 // Don't wait immediately after registration in case more data 2878 // became available between when we last checked and when we setup 2879 // the notification. 2880 if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { 2881 if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 2882 return int(total), syserr.ErrTryAgain 2883 } 2884 // handleIOError will consume errors from t.Block if needed. 2885 return int(total), syserr.FromError(err) 2886 } 2887 } 2888 continue 2889 } 2890 return int(total), syserr.TranslateNetstackError(err) 2891 } 2892 } 2893 2894 // Ioctl implements fs.FileOperations.Ioctl. 2895 func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { 2896 return s.socketOpsCommon.ioctl(ctx, io, args) 2897 } 2898 2899 func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { 2900 t := kernel.TaskFromContext(ctx) 2901 if t == nil { 2902 panic("ioctl(2) may only be called from a task goroutine") 2903 } 2904 2905 // SIOCGSTAMP is implemented by netstack rather than all commonEndpoint 2906 // sockets. 2907 // TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP. 2908 switch args[1].Int() { 2909 case linux.SIOCGSTAMP: 2910 s.readMu.Lock() 2911 defer s.readMu.Unlock() 2912 if !s.timestampValid { 2913 return 0, syserror.ENOENT 2914 } 2915 2916 tv := linux.NsecToTimeval(s.timestampNS) 2917 _, err := tv.CopyOut(t, args[2].Pointer()) 2918 return 0, err 2919 2920 case linux.TIOCINQ: 2921 v, terr := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption) 2922 if terr != nil { 2923 return 0, syserr.TranslateNetstackError(terr).ToError() 2924 } 2925 2926 if v > math.MaxInt32 { 2927 v = math.MaxInt32 2928 } 2929 2930 // Copy result to userspace. 2931 vP := primitive.Int32(v) 2932 _, err := vP.CopyOut(t, args[2].Pointer()) 2933 return 0, err 2934 } 2935 2936 return Ioctl(ctx, s.Endpoint, io, args) 2937 } 2938 2939 // Ioctl performs a socket ioctl. 2940 func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { 2941 t := kernel.TaskFromContext(ctx) 2942 if t == nil { 2943 panic("ioctl(2) may only be called from a task goroutine") 2944 } 2945 2946 switch arg := int(args[1].Int()); arg { 2947 case linux.SIOCGIFFLAGS, 2948 linux.SIOCGIFADDR, 2949 linux.SIOCGIFBRDADDR, 2950 linux.SIOCGIFDSTADDR, 2951 linux.SIOCGIFHWADDR, 2952 linux.SIOCGIFINDEX, 2953 linux.SIOCGIFMAP, 2954 linux.SIOCGIFMETRIC, 2955 linux.SIOCGIFMTU, 2956 linux.SIOCGIFNAME, 2957 linux.SIOCGIFNETMASK, 2958 linux.SIOCGIFTXQLEN, 2959 linux.SIOCETHTOOL: 2960 2961 var ifr linux.IFReq 2962 if _, err := ifr.CopyIn(t, args[2].Pointer()); err != nil { 2963 return 0, err 2964 } 2965 if err := interfaceIoctl(ctx, io, arg, &ifr); err != nil { 2966 return 0, err.ToError() 2967 } 2968 _, err := ifr.CopyOut(t, args[2].Pointer()) 2969 return 0, err 2970 2971 case linux.SIOCGIFCONF: 2972 // Return a list of interface addresses or the buffer size 2973 // necessary to hold the list. 2974 var ifc linux.IFConf 2975 if _, err := ifc.CopyIn(t, args[2].Pointer()); err != nil { 2976 return 0, err 2977 } 2978 2979 if err := ifconfIoctl(ctx, t, io, &ifc); err != nil { 2980 return 0, err 2981 } 2982 2983 _, err := ifc.CopyOut(t, args[2].Pointer()) 2984 return 0, err 2985 2986 case linux.TIOCINQ: 2987 v, terr := ep.GetSockOptInt(tcpip.ReceiveQueueSizeOption) 2988 if terr != nil { 2989 return 0, syserr.TranslateNetstackError(terr).ToError() 2990 } 2991 2992 if v > math.MaxInt32 { 2993 v = math.MaxInt32 2994 } 2995 // Copy result to userspace. 2996 vP := primitive.Int32(v) 2997 _, err := vP.CopyOut(t, args[2].Pointer()) 2998 return 0, err 2999 3000 case linux.TIOCOUTQ: 3001 v, terr := ep.GetSockOptInt(tcpip.SendQueueSizeOption) 3002 if terr != nil { 3003 return 0, syserr.TranslateNetstackError(terr).ToError() 3004 } 3005 3006 if v > math.MaxInt32 { 3007 v = math.MaxInt32 3008 } 3009 3010 // Copy result to userspace. 3011 vP := primitive.Int32(v) 3012 _, err := vP.CopyOut(t, args[2].Pointer()) 3013 return 0, err 3014 3015 case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG: 3016 unimpl.EmitUnimplementedEvent(ctx) 3017 } 3018 3019 return 0, syserror.ENOTTY 3020 } 3021 3022 // interfaceIoctl implements interface requests. 3023 func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFReq) *syserr.Error { 3024 var ( 3025 iface inet.Interface 3026 index int32 3027 found bool 3028 ) 3029 3030 // Find the relevant device. 3031 stack := inet.StackFromContext(ctx) 3032 if stack == nil { 3033 return syserr.ErrNoDevice 3034 } 3035 3036 // SIOCGIFNAME uses ifr.ifr_ifindex rather than ifr.ifr_name to 3037 // identify a device. 3038 if arg == linux.SIOCGIFNAME { 3039 // Gets the name of the interface given the interface index 3040 // stored in ifr_ifindex. 3041 index = int32(hostarch.ByteOrder.Uint32(ifr.Data[:4])) 3042 if iface, ok := stack.Interfaces()[index]; ok { 3043 ifr.SetName(iface.Name) 3044 return nil 3045 } 3046 return syserr.ErrNoDevice 3047 } 3048 3049 // Find the relevant device. 3050 for index, iface = range stack.Interfaces() { 3051 if iface.Name == ifr.Name() { 3052 found = true 3053 break 3054 } 3055 } 3056 if !found { 3057 return syserr.ErrNoDevice 3058 } 3059 3060 switch arg { 3061 case linux.SIOCGIFINDEX: 3062 // Copy out the index to the data. 3063 hostarch.ByteOrder.PutUint32(ifr.Data[:], uint32(index)) 3064 3065 case linux.SIOCGIFHWADDR: 3066 // Copy the hardware address out. 3067 // 3068 // Refer: https://linux.die.net/man/7/netdevice 3069 // SIOCGIFHWADDR, SIOCSIFHWADDR 3070 // 3071 // Get or set the hardware address of a device using 3072 // ifr_hwaddr. The hardware address is specified in a struct 3073 // sockaddr. sa_family contains the ARPHRD_* device type, 3074 // sa_data the L2 hardware address starting from byte 0. Setting 3075 // the hardware address is a privileged operation. 3076 hostarch.ByteOrder.PutUint16(ifr.Data[:], iface.DeviceType) 3077 n := copy(ifr.Data[2:], iface.Addr) 3078 for i := 2 + n; i < len(ifr.Data); i++ { 3079 ifr.Data[i] = 0 // Clear padding. 3080 } 3081 3082 case linux.SIOCGIFFLAGS: 3083 f, err := interfaceStatusFlags(stack, iface.Name) 3084 if err != nil { 3085 return err 3086 } 3087 // Drop the flags that don't fit in the size that we need to return. This 3088 // matches Linux behavior. 3089 hostarch.ByteOrder.PutUint16(ifr.Data[:2], uint16(f)) 3090 3091 case linux.SIOCGIFADDR: 3092 // Copy the IPv4 address out. 3093 for _, addr := range stack.InterfaceAddrs()[index] { 3094 // This ioctl is only compatible with AF_INET addresses. 3095 if addr.Family != linux.AF_INET { 3096 continue 3097 } 3098 copy(ifr.Data[4:8], addr.Addr) 3099 break 3100 } 3101 3102 case linux.SIOCGIFMETRIC: 3103 // Gets the metric of the device. As per netdevice(7), this 3104 // always just sets ifr_metric to 0. 3105 hostarch.ByteOrder.PutUint32(ifr.Data[:4], 0) 3106 3107 case linux.SIOCGIFMTU: 3108 // Gets the MTU of the device. 3109 hostarch.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU) 3110 3111 case linux.SIOCGIFMAP: 3112 // Gets the hardware parameters of the device. 3113 // TODO(github.com/SagerNet/issue/505): Implement. 3114 3115 case linux.SIOCGIFTXQLEN: 3116 // Gets the transmit queue length of the device. 3117 // TODO(github.com/SagerNet/issue/505): Implement. 3118 3119 case linux.SIOCGIFDSTADDR: 3120 // Gets the destination address of a point-to-point device. 3121 // TODO(github.com/SagerNet/issue/505): Implement. 3122 3123 case linux.SIOCGIFBRDADDR: 3124 // Gets the broadcast address of a device. 3125 // TODO(github.com/SagerNet/issue/505): Implement. 3126 3127 case linux.SIOCGIFNETMASK: 3128 // Gets the network mask of a device. 3129 for _, addr := range stack.InterfaceAddrs()[index] { 3130 // This ioctl is only compatible with AF_INET addresses. 3131 if addr.Family != linux.AF_INET { 3132 continue 3133 } 3134 // Populate ifr.ifr_netmask (type sockaddr). 3135 hostarch.ByteOrder.PutUint16(ifr.Data[0:], uint16(linux.AF_INET)) 3136 hostarch.ByteOrder.PutUint16(ifr.Data[2:], 0) 3137 var mask uint32 = 0xffffffff << (32 - addr.PrefixLen) 3138 // Netmask is expected to be returned as a big endian 3139 // value. 3140 binary.BigEndian.PutUint32(ifr.Data[4:8], mask) 3141 break 3142 } 3143 3144 case linux.SIOCETHTOOL: 3145 // Stubbed out for now, Ideally we should implement the required 3146 // sub-commands for ETHTOOL 3147 // 3148 // See: 3149 // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/net/core/dev_ioctl.c 3150 return syserr.ErrEndpointOperation 3151 3152 default: 3153 // Not a valid call. 3154 return syserr.ErrInvalidArgument 3155 } 3156 3157 return nil 3158 } 3159 3160 // ifconfIoctl populates a struct ifconf for the SIOCGIFCONF ioctl. 3161 func ifconfIoctl(ctx context.Context, t *kernel.Task, io usermem.IO, ifc *linux.IFConf) error { 3162 // If Ptr is NULL, return the necessary buffer size via Len. 3163 // Otherwise, write up to Len bytes starting at Ptr containing ifreq 3164 // structs. 3165 stack := inet.StackFromContext(ctx) 3166 if stack == nil { 3167 return syserr.ErrNoDevice.ToError() 3168 } 3169 3170 if ifc.Ptr == 0 { 3171 ifc.Len = int32(len(stack.Interfaces())) * int32(linux.SizeOfIFReq) 3172 return nil 3173 } 3174 3175 max := ifc.Len 3176 ifc.Len = 0 3177 for key, ifaceAddrs := range stack.InterfaceAddrs() { 3178 iface := stack.Interfaces()[key] 3179 for _, ifaceAddr := range ifaceAddrs { 3180 // Don't write past the end of the buffer. 3181 if ifc.Len+int32(linux.SizeOfIFReq) > max { 3182 break 3183 } 3184 if ifaceAddr.Family != linux.AF_INET { 3185 continue 3186 } 3187 3188 // Populate ifr.ifr_addr. 3189 ifr := linux.IFReq{} 3190 ifr.SetName(iface.Name) 3191 hostarch.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family)) 3192 hostarch.ByteOrder.PutUint16(ifr.Data[2:4], 0) 3193 copy(ifr.Data[4:8], ifaceAddr.Addr[:4]) 3194 3195 // Copy the ifr to userspace. 3196 dst := uintptr(ifc.Ptr) + uintptr(ifc.Len) 3197 ifc.Len += int32(linux.SizeOfIFReq) 3198 if _, err := ifr.CopyOut(t, hostarch.Addr(dst)); err != nil { 3199 return err 3200 } 3201 } 3202 } 3203 return nil 3204 } 3205 3206 // interfaceStatusFlags returns status flags for an interface in the stack. 3207 // Flag values and meanings are described in greater detail in netdevice(7) in 3208 // the SIOCGIFFLAGS section. 3209 func interfaceStatusFlags(stack inet.Stack, name string) (uint32, *syserr.Error) { 3210 // We should only ever be passed a netstack.Stack. 3211 epstack, ok := stack.(*Stack) 3212 if !ok { 3213 return 0, errStackType 3214 } 3215 3216 // Find the NIC corresponding to this interface. 3217 for _, info := range epstack.Stack.NICInfo() { 3218 if info.Name == name { 3219 return nicStateFlagsToLinux(info.Flags), nil 3220 } 3221 } 3222 return 0, syserr.ErrNoDevice 3223 } 3224 3225 func nicStateFlagsToLinux(f stack.NICStateFlags) uint32 { 3226 var rv uint32 3227 if f.Up { 3228 rv |= linux.IFF_UP | linux.IFF_LOWER_UP 3229 } 3230 if f.Running { 3231 rv |= linux.IFF_RUNNING 3232 } 3233 if f.Promiscuous { 3234 rv |= linux.IFF_PROMISC 3235 } 3236 if f.Loopback { 3237 rv |= linux.IFF_LOOPBACK 3238 } 3239 return rv 3240 } 3241 3242 func isTCPSocket(skType linux.SockType, skProto int) bool { 3243 return skType == linux.SOCK_STREAM && (skProto == 0 || skProto == unix.IPPROTO_TCP) 3244 } 3245 3246 func isUDPSocket(skType linux.SockType, skProto int) bool { 3247 return skType == linux.SOCK_DGRAM && (skProto == 0 || skProto == unix.IPPROTO_UDP) 3248 } 3249 3250 func isICMPSocket(skType linux.SockType, skProto int) bool { 3251 return skType == linux.SOCK_DGRAM && (skProto == unix.IPPROTO_ICMP || skProto == unix.IPPROTO_ICMPV6) 3252 } 3253 3254 // State implements socket.Socket.State. State translates the internal state 3255 // returned by netstack to values defined by Linux. 3256 func (s *socketOpsCommon) State() uint32 { 3257 if s.family != linux.AF_INET && s.family != linux.AF_INET6 { 3258 // States not implemented for this socket's family. 3259 return 0 3260 } 3261 3262 switch { 3263 case isTCPSocket(s.skType, s.protocol): 3264 // TCP socket. 3265 switch tcp.EndpointState(s.Endpoint.State()) { 3266 case tcp.StateEstablished: 3267 return linux.TCP_ESTABLISHED 3268 case tcp.StateSynSent: 3269 return linux.TCP_SYN_SENT 3270 case tcp.StateSynRecv: 3271 return linux.TCP_SYN_RECV 3272 case tcp.StateFinWait1: 3273 return linux.TCP_FIN_WAIT1 3274 case tcp.StateFinWait2: 3275 return linux.TCP_FIN_WAIT2 3276 case tcp.StateTimeWait: 3277 return linux.TCP_TIME_WAIT 3278 case tcp.StateClose, tcp.StateInitial, tcp.StateBound, tcp.StateConnecting, tcp.StateError: 3279 return linux.TCP_CLOSE 3280 case tcp.StateCloseWait: 3281 return linux.TCP_CLOSE_WAIT 3282 case tcp.StateLastAck: 3283 return linux.TCP_LAST_ACK 3284 case tcp.StateListen: 3285 return linux.TCP_LISTEN 3286 case tcp.StateClosing: 3287 return linux.TCP_CLOSING 3288 default: 3289 // Internal or unknown state. 3290 return 0 3291 } 3292 case isUDPSocket(s.skType, s.protocol): 3293 // UDP socket. 3294 switch udp.EndpointState(s.Endpoint.State()) { 3295 case udp.StateInitial, udp.StateBound, udp.StateClosed: 3296 return linux.TCP_CLOSE 3297 case udp.StateConnected: 3298 return linux.TCP_ESTABLISHED 3299 default: 3300 return 0 3301 } 3302 case isICMPSocket(s.skType, s.protocol): 3303 // TODO(b/112063468): Export states for ICMP sockets. 3304 case s.skType == linux.SOCK_RAW: 3305 // TODO(b/112063468): Export states for raw sockets. 3306 default: 3307 // Unknown transport protocol, how did we make this socket? 3308 log.Warningf("Unknown transport protocol for an existing socket: family=%v, type=%v, protocol=%v, internal type %v", s.family, s.skType, s.protocol, reflect.TypeOf(s.Endpoint).Elem()) 3309 return 0 3310 } 3311 3312 return 0 3313 } 3314 3315 // Type implements socket.Socket.Type. 3316 func (s *socketOpsCommon) Type() (family int, skType linux.SockType, protocol int) { 3317 return s.family, s.skType, s.protocol 3318 } 3319 3320 // LINT.ThenChange(./netstack_vfs2.go)