github.com/polevpn/netstack@v1.10.9/tcpip/transport/packet/endpoint.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package packet provides the implementation of packet sockets (see 16 // packet(7)). Packet sockets allow applications to: 17 // 18 // * manually write and inspect link, network, and transport headers 19 // * receive all traffic of a given network protocol, or all protocols 20 // 21 // Packet sockets are similar to raw sockets, but provide even more power to 22 // users, letting them effectively talk directly to the network device. 23 // 24 // Packet sockets skip the input and output iptables chains. 25 package packet 26 27 import ( 28 "sync" 29 30 "github.com/polevpn/netstack/tcpip" 31 "github.com/polevpn/netstack/tcpip/buffer" 32 "github.com/polevpn/netstack/tcpip/header" 33 "github.com/polevpn/netstack/tcpip/iptables" 34 "github.com/polevpn/netstack/tcpip/stack" 35 "github.com/polevpn/netstack/waiter" 36 ) 37 38 // +stateify savable 39 type packet struct { 40 packetEntry 41 // data holds the actual packet data, including any headers and 42 // payload. 43 data buffer.VectorisedView 44 // timestampNS is the unix time at which the packet was received. 45 timestampNS int64 46 // senderAddr is the network address of the sender. 47 senderAddr tcpip.FullAddress 48 } 49 50 // endpoint is the packet socket implementation of tcpip.Endpoint. It is legal 51 // to have goroutines make concurrent calls into the endpoint. 52 // 53 // Lock order: 54 // endpoint.mu 55 // endpoint.rcvMu 56 // 57 // +stateify savable 58 type endpoint struct { 59 stack.TransportEndpointInfo 60 // The following fields are initialized at creation time and are 61 // immutable. 62 stack *stack.Stack 63 netProto tcpip.NetworkProtocolNumber 64 waiterQueue *waiter.Queue 65 cooked bool 66 67 // The following fields are used to manage the receive queue and are 68 // protected by rcvMu. 69 rcvMu sync.Mutex 70 rcvList packetList 71 rcvBufSizeMax int 72 rcvBufSize int 73 rcvClosed bool 74 75 // The following fields are protected by mu. 76 mu sync.RWMutex 77 sndBufSize int 78 closed bool 79 stats tcpip.TransportEndpointStats 80 } 81 82 // NewEndpoint returns a new packet endpoint. 83 func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { 84 ep := &endpoint{ 85 stack: s, 86 TransportEndpointInfo: stack.TransportEndpointInfo{ 87 NetProto: netProto, 88 }, 89 cooked: cooked, 90 netProto: netProto, 91 waiterQueue: waiterQueue, 92 rcvBufSizeMax: 32 * 1024, 93 sndBufSize: 32 * 1024, 94 } 95 96 if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil { 97 return nil, err 98 } 99 return ep, nil 100 } 101 102 // Close implements tcpip.Endpoint.Close. 103 func (ep *endpoint) Close() { 104 ep.mu.Lock() 105 defer ep.mu.Unlock() 106 107 if ep.closed { 108 return 109 } 110 111 ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep) 112 113 ep.rcvMu.Lock() 114 defer ep.rcvMu.Unlock() 115 116 // Clear the receive list. 117 ep.rcvClosed = true 118 ep.rcvBufSize = 0 119 for !ep.rcvList.Empty() { 120 ep.rcvList.Remove(ep.rcvList.Front()) 121 } 122 123 ep.closed = true 124 ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) 125 } 126 127 // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. 128 func (ep *endpoint) ModerateRecvBuf(copied int) {} 129 130 // IPTables implements tcpip.Endpoint.IPTables. 131 func (ep *endpoint) IPTables() (iptables.IPTables, error) { 132 return ep.stack.IPTables(), nil 133 } 134 135 // Read implements tcpip.Endpoint.Read. 136 func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { 137 ep.rcvMu.Lock() 138 139 // If there's no data to read, return that read would block or that the 140 // endpoint is closed. 141 if ep.rcvList.Empty() { 142 err := tcpip.ErrWouldBlock 143 if ep.rcvClosed { 144 ep.stats.ReadErrors.ReadClosed.Increment() 145 err = tcpip.ErrClosedForReceive 146 } 147 ep.rcvMu.Unlock() 148 return buffer.View{}, tcpip.ControlMessages{}, err 149 } 150 151 packet := ep.rcvList.Front() 152 ep.rcvList.Remove(packet) 153 ep.rcvBufSize -= packet.data.Size() 154 155 ep.rcvMu.Unlock() 156 157 if addr != nil { 158 *addr = packet.senderAddr 159 } 160 161 return packet.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: packet.timestampNS}, nil 162 } 163 164 func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) { 165 // TODO(b/129292371): Implement. 166 return 0, nil, tcpip.ErrInvalidOptionValue 167 } 168 169 // Peek implements tcpip.Endpoint.Peek. 170 func (ep *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) { 171 return 0, tcpip.ControlMessages{}, nil 172 } 173 174 // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be 175 // disconnected, and this function always returns tpcip.ErrNotSupported. 176 func (*endpoint) Disconnect() *tcpip.Error { 177 return tcpip.ErrNotSupported 178 } 179 180 // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be 181 // connected, and this function always returnes tcpip.ErrNotSupported. 182 func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { 183 return tcpip.ErrNotSupported 184 } 185 186 // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used 187 // with Shutdown, and this function always returns tcpip.ErrNotSupported. 188 func (ep *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { 189 return tcpip.ErrNotSupported 190 } 191 192 // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with 193 // Listen, and this function always returns tcpip.ErrNotSupported. 194 func (ep *endpoint) Listen(backlog int) *tcpip.Error { 195 return tcpip.ErrNotSupported 196 } 197 198 // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with 199 // Accept, and this function always returns tcpip.ErrNotSupported. 200 func (ep *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { 201 return nil, nil, tcpip.ErrNotSupported 202 } 203 204 // Bind implements tcpip.Endpoint.Bind. 205 func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { 206 // TODO(gvisor.dev/issue/173): Add Bind support. 207 208 // "By default, all packets of the specified protocol type are passed 209 // to a packet socket. To get packets only from a specific interface 210 // use bind(2) specifying an address in a struct sockaddr_ll to bind 211 // the packet socket to an interface. Fields used for binding are 212 // sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex." 213 // - packet(7). 214 215 return tcpip.ErrNotSupported 216 } 217 218 // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. 219 func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { 220 return tcpip.FullAddress{}, tcpip.ErrNotSupported 221 } 222 223 // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress. 224 func (ep *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { 225 // Even a connected socket doesn't return a remote address. 226 return tcpip.FullAddress{}, tcpip.ErrNotConnected 227 } 228 229 // Readiness implements tcpip.Endpoint.Readiness. 230 func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { 231 // The endpoint is always writable. 232 result := waiter.EventOut & mask 233 234 // Determine whether the endpoint is readable. 235 if (mask & waiter.EventIn) != 0 { 236 ep.rcvMu.Lock() 237 if !ep.rcvList.Empty() || ep.rcvClosed { 238 result |= waiter.EventIn 239 } 240 ep.rcvMu.Unlock() 241 } 242 243 return result 244 } 245 246 // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be 247 // used with SetSockOpt, and this function always returns 248 // tcpip.ErrNotSupported. 249 func (ep *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { 250 return tcpip.ErrNotSupported 251 } 252 253 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. 254 func (ep *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error { 255 return tcpip.ErrUnknownProtocolOption 256 } 257 258 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. 259 func (ep *endpoint) GetSockOptInt(opt tcpip.SockOpt) (int, *tcpip.Error) { 260 return 0, tcpip.ErrNotSupported 261 } 262 263 // GetSockOpt implements tcpip.Endpoint.GetSockOpt. 264 func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { 265 return tcpip.ErrNotSupported 266 } 267 268 // HandlePacket implements stack.PacketEndpoint.HandlePacket. 269 func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { 270 ep.rcvMu.Lock() 271 272 // Drop the packet if our buffer is currently full. 273 if ep.rcvClosed { 274 ep.rcvMu.Unlock() 275 ep.stack.Stats().DroppedPackets.Increment() 276 ep.stats.ReceiveErrors.ClosedReceiver.Increment() 277 return 278 } 279 280 if ep.rcvBufSize >= ep.rcvBufSizeMax { 281 ep.rcvMu.Unlock() 282 ep.stack.Stats().DroppedPackets.Increment() 283 ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment() 284 return 285 } 286 287 wasEmpty := ep.rcvBufSize == 0 288 289 // Push new packet into receive list and increment the buffer size. 290 var packet packet 291 // TODO(b/129292371): Return network protocol. 292 if len(pkt.LinkHeader) > 0 { 293 // Get info directly from the ethernet header. 294 hdr := header.Ethernet(pkt.LinkHeader) 295 packet.senderAddr = tcpip.FullAddress{ 296 NIC: nicID, 297 Addr: tcpip.Address(hdr.SourceAddress()), 298 } 299 } else { 300 // Guess the would-be ethernet header. 301 packet.senderAddr = tcpip.FullAddress{ 302 NIC: nicID, 303 Addr: tcpip.Address(localAddr), 304 } 305 } 306 307 if ep.cooked { 308 // Cooked packets can simply be queued. 309 packet.data = pkt.Data 310 } else { 311 // Raw packets need their ethernet headers prepended before 312 // queueing. 313 var linkHeader buffer.View 314 if len(pkt.LinkHeader) == 0 { 315 // We weren't provided with an actual ethernet header, 316 // so fake one. 317 ethFields := header.EthernetFields{ 318 SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), 319 DstAddr: localAddr, 320 Type: netProto, 321 } 322 fakeHeader := make(header.Ethernet, header.EthernetMinimumSize) 323 fakeHeader.Encode(ðFields) 324 linkHeader = buffer.View(fakeHeader) 325 } else { 326 linkHeader = append(buffer.View(nil), pkt.LinkHeader...) 327 } 328 combinedVV := linkHeader.ToVectorisedView() 329 combinedVV.Append(pkt.Data) 330 packet.data = combinedVV 331 } 332 packet.timestampNS = ep.stack.NowNanoseconds() 333 334 ep.rcvList.PushBack(&packet) 335 ep.rcvBufSize += packet.data.Size() 336 337 ep.rcvMu.Unlock() 338 ep.stats.PacketsReceived.Increment() 339 // Notify waiters that there's data to be read. 340 if wasEmpty { 341 ep.waiterQueue.Notify(waiter.EventIn) 342 } 343 } 344 345 // State implements socket.Socket.State. 346 func (ep *endpoint) State() uint32 { 347 return 0 348 } 349 350 // Info returns a copy of the endpoint info. 351 func (ep *endpoint) Info() tcpip.EndpointInfo { 352 ep.mu.RLock() 353 // Make a copy of the endpoint info. 354 ret := ep.TransportEndpointInfo 355 ep.mu.RUnlock() 356 return &ret 357 } 358 359 // Stats returns a pointer to the endpoint stats. 360 func (ep *endpoint) Stats() tcpip.EndpointStats { 361 return &ep.stats 362 }