inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/transport/packet/endpoint.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package packet provides the implementation of packet sockets (see 16 // packet(7)). Packet sockets allow applications to: 17 // 18 // * manually write and inspect link, network, and transport headers 19 // * receive all traffic of a given network protocol, or all protocols 20 // 21 // Packet sockets are similar to raw sockets, but provide even more power to 22 // users, letting them effectively talk directly to the network device. 23 // 24 // Packet sockets skip the input and output iptables chains. 25 package packet 26 27 import ( 28 "io" 29 "time" 30 31 "inet.af/netstack/sync" 32 "inet.af/netstack/tcpip" 33 "inet.af/netstack/tcpip/buffer" 34 "inet.af/netstack/tcpip/header" 35 "inet.af/netstack/tcpip/stack" 36 "inet.af/netstack/waiter" 37 ) 38 39 // +stateify savable 40 type packet struct { 41 packetEntry 42 // data holds the actual packet data, including any headers and 43 // payload. 44 data buffer.VectorisedView `state:".(buffer.VectorisedView)"` 45 receivedAt time.Time `state:".(int64)"` 46 // senderAddr is the network address of the sender. 47 senderAddr tcpip.FullAddress 48 // packetInfo holds additional information like the protocol 49 // of the packet etc. 50 packetInfo tcpip.LinkPacketInfo 51 } 52 53 // endpoint is the packet socket implementation of tcpip.Endpoint. It is legal 54 // to have goroutines make concurrent calls into the endpoint. 55 // 56 // Lock order: 57 // endpoint.mu 58 // endpoint.rcvMu 59 // 60 // +stateify savable 61 type endpoint struct { 62 tcpip.DefaultSocketOptionsHandler 63 64 // The following fields are initialized at creation time and are 65 // immutable. 66 stack *stack.Stack `state:"manual"` 67 waiterQueue *waiter.Queue 68 cooked bool 69 ops tcpip.SocketOptions 70 stats tcpip.TransportEndpointStats 71 72 // The following fields are used to manage the receive queue. 73 rcvMu sync.Mutex `state:"nosave"` 74 // +checklocks:rcvMu 75 rcvList packetList 76 // +checklocks:rcvMu 77 rcvBufSize int 78 // +checklocks:rcvMu 79 rcvClosed bool 80 // +checklocks:rcvMu 81 rcvDisabled bool 82 83 mu sync.RWMutex `state:"nosave"` 84 // +checklocks:mu 85 closed bool 86 // +checklocks:mu 87 boundNetProto tcpip.NetworkProtocolNumber 88 // +checklocks:mu 89 boundNIC tcpip.NICID 90 91 lastErrorMu sync.Mutex `state:"nosave"` 92 // +checklocks:lastErrorMu 93 lastError tcpip.Error 94 } 95 96 // NewEndpoint returns a new packet endpoint. 97 func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 98 ep := &endpoint{ 99 stack: s, 100 cooked: cooked, 101 boundNetProto: netProto, 102 waiterQueue: waiterQueue, 103 } 104 ep.ops.InitHandler(ep, ep.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) 105 ep.ops.SetReceiveBufferSize(32*1024, false /* notify */) 106 107 // Override with stack defaults. 108 var ss tcpip.SendBufferSizeOption 109 if err := s.Option(&ss); err == nil { 110 ep.ops.SetSendBufferSize(int64(ss.Default), false /* notify */) 111 } 112 113 var rs tcpip.ReceiveBufferSizeOption 114 if err := s.Option(&rs); err == nil { 115 ep.ops.SetReceiveBufferSize(int64(rs.Default), false /* notify */) 116 } 117 118 if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil { 119 return nil, err 120 } 121 return ep, nil 122 } 123 124 // Abort implements stack.TransportEndpoint.Abort. 125 func (ep *endpoint) Abort() { 126 ep.Close() 127 } 128 129 // Close implements tcpip.Endpoint.Close. 130 func (ep *endpoint) Close() { 131 ep.mu.Lock() 132 defer ep.mu.Unlock() 133 134 if ep.closed { 135 return 136 } 137 138 ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep) 139 140 ep.rcvMu.Lock() 141 defer ep.rcvMu.Unlock() 142 143 // Clear the receive list. 144 ep.rcvClosed = true 145 ep.rcvBufSize = 0 146 for !ep.rcvList.Empty() { 147 ep.rcvList.Remove(ep.rcvList.Front()) 148 } 149 150 ep.closed = true 151 ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents) 152 } 153 154 // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. 155 func (*endpoint) ModerateRecvBuf(int) {} 156 157 // Read implements tcpip.Endpoint.Read. 158 func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) { 159 ep.rcvMu.Lock() 160 161 // If there's no data to read, return that read would block or that the 162 // endpoint is closed. 163 if ep.rcvList.Empty() { 164 var err tcpip.Error = &tcpip.ErrWouldBlock{} 165 if ep.rcvClosed { 166 ep.stats.ReadErrors.ReadClosed.Increment() 167 err = &tcpip.ErrClosedForReceive{} 168 } 169 ep.rcvMu.Unlock() 170 return tcpip.ReadResult{}, err 171 } 172 173 packet := ep.rcvList.Front() 174 if !opts.Peek { 175 ep.rcvList.Remove(packet) 176 ep.rcvBufSize -= packet.data.Size() 177 } 178 179 ep.rcvMu.Unlock() 180 181 res := tcpip.ReadResult{ 182 Total: packet.data.Size(), 183 ControlMessages: tcpip.ControlMessages{ 184 HasTimestamp: true, 185 Timestamp: packet.receivedAt, 186 }, 187 } 188 if opts.NeedRemoteAddr { 189 res.RemoteAddr = packet.senderAddr 190 } 191 if opts.NeedLinkPacketInfo { 192 res.LinkPacketInfo = packet.packetInfo 193 } 194 195 n, err := packet.data.ReadTo(dst, opts.Peek) 196 if n == 0 && err != nil { 197 return res, &tcpip.ErrBadBuffer{} 198 } 199 res.Count = n 200 return res, nil 201 } 202 203 func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { 204 if !ep.stack.PacketEndpointWriteSupported() { 205 return 0, &tcpip.ErrNotSupported{} 206 } 207 208 ep.mu.Lock() 209 closed := ep.closed 210 nicID := ep.boundNIC 211 proto := ep.boundNetProto 212 ep.mu.Unlock() 213 if closed { 214 return 0, &tcpip.ErrClosedForSend{} 215 } 216 217 var remote tcpip.LinkAddress 218 if to := opts.To; to != nil { 219 remote = tcpip.LinkAddress(to.Addr) 220 221 if n := to.NIC; n != 0 { 222 nicID = n 223 } 224 225 if p := to.Port; p != 0 { 226 proto = tcpip.NetworkProtocolNumber(p) 227 } 228 } 229 230 if nicID == 0 { 231 return 0, &tcpip.ErrInvalidOptionValue{} 232 } 233 234 // TODO(https://gvisor.dev/issue/6538): Avoid this allocation. 235 payloadBytes := make(buffer.View, p.Len()) 236 if _, err := io.ReadFull(p, payloadBytes); err != nil { 237 return 0, &tcpip.ErrBadBuffer{} 238 } 239 240 if err := func() tcpip.Error { 241 if ep.cooked { 242 return ep.stack.WritePacketToRemote(nicID, remote, proto, payloadBytes.ToVectorisedView()) 243 } 244 return ep.stack.WriteRawPacket(nicID, proto, payloadBytes.ToVectorisedView()) 245 }(); err != nil { 246 return 0, err 247 } 248 return int64(len(payloadBytes)), nil 249 } 250 251 // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be 252 // disconnected, and this function always returns tpcip.ErrNotSupported. 253 func (*endpoint) Disconnect() tcpip.Error { 254 return &tcpip.ErrNotSupported{} 255 } 256 257 // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be 258 // connected, and this function always returnes *tcpip.ErrNotSupported. 259 func (*endpoint) Connect(tcpip.FullAddress) tcpip.Error { 260 return &tcpip.ErrNotSupported{} 261 } 262 263 // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used 264 // with Shutdown, and this function always returns *tcpip.ErrNotSupported. 265 func (*endpoint) Shutdown(tcpip.ShutdownFlags) tcpip.Error { 266 return &tcpip.ErrNotSupported{} 267 } 268 269 // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with 270 // Listen, and this function always returns *tcpip.ErrNotSupported. 271 func (*endpoint) Listen(int) tcpip.Error { 272 return &tcpip.ErrNotSupported{} 273 } 274 275 // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with 276 // Accept, and this function always returns *tcpip.ErrNotSupported. 277 func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) { 278 return nil, nil, &tcpip.ErrNotSupported{} 279 } 280 281 // Bind implements tcpip.Endpoint.Bind. 282 func (ep *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error { 283 // "By default, all packets of the specified protocol type are passed 284 // to a packet socket. To get packets only from a specific interface 285 // use bind(2) specifying an address in a struct sockaddr_ll to bind 286 // the packet socket to an interface. Fields used for binding are 287 // sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex." 288 // - packet(7). 289 290 ep.mu.Lock() 291 defer ep.mu.Unlock() 292 293 netProto := tcpip.NetworkProtocolNumber(addr.Port) 294 if netProto == 0 { 295 // Do not allow unbinding the network protocol. 296 netProto = ep.boundNetProto 297 } 298 299 if ep.boundNIC == addr.NIC && ep.boundNetProto == netProto { 300 // Already bound to the requested NIC and network protocol. 301 return nil 302 } 303 304 // TODO(https://gvisor.dev/issue/6618): Unregister after registering the new 305 // binding. 306 ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep) 307 ep.boundNIC = 0 308 ep.boundNetProto = 0 309 310 // Bind endpoint to receive packets from specific interface. 311 if err := ep.stack.RegisterPacketEndpoint(addr.NIC, netProto, ep); err != nil { 312 return err 313 } 314 315 ep.boundNIC = addr.NIC 316 ep.boundNetProto = netProto 317 return nil 318 } 319 320 // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. 321 func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { 322 ep.mu.RLock() 323 defer ep.mu.RUnlock() 324 325 return tcpip.FullAddress{ 326 NIC: ep.boundNIC, 327 Port: uint16(ep.boundNetProto), 328 }, nil 329 } 330 331 // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress. 332 func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { 333 // Even a connected socket doesn't return a remote address. 334 return tcpip.FullAddress{}, &tcpip.ErrNotConnected{} 335 } 336 337 // Readiness implements tcpip.Endpoint.Readiness. 338 func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { 339 // The endpoint is always writable. 340 result := waiter.WritableEvents & mask 341 342 // Determine whether the endpoint is readable. 343 if (mask & waiter.ReadableEvents) != 0 { 344 ep.rcvMu.Lock() 345 if !ep.rcvList.Empty() || ep.rcvClosed { 346 result |= waiter.ReadableEvents 347 } 348 ep.rcvMu.Unlock() 349 } 350 351 return result 352 } 353 354 // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be 355 // used with SetSockOpt, and this function always returns 356 // *tcpip.ErrNotSupported. 357 func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { 358 switch opt.(type) { 359 case *tcpip.SocketDetachFilterOption: 360 return nil 361 362 default: 363 return &tcpip.ErrUnknownProtocolOption{} 364 } 365 } 366 367 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. 368 func (*endpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error { 369 return &tcpip.ErrUnknownProtocolOption{} 370 } 371 372 func (ep *endpoint) LastError() tcpip.Error { 373 ep.lastErrorMu.Lock() 374 defer ep.lastErrorMu.Unlock() 375 376 err := ep.lastError 377 ep.lastError = nil 378 return err 379 } 380 381 // UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError. 382 func (ep *endpoint) UpdateLastError(err tcpip.Error) { 383 ep.lastErrorMu.Lock() 384 ep.lastError = err 385 ep.lastErrorMu.Unlock() 386 } 387 388 // GetSockOpt implements tcpip.Endpoint.GetSockOpt. 389 func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) tcpip.Error { 390 return &tcpip.ErrNotSupported{} 391 } 392 393 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. 394 func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { 395 switch opt { 396 case tcpip.ReceiveQueueSizeOption: 397 v := 0 398 ep.rcvMu.Lock() 399 if !ep.rcvList.Empty() { 400 p := ep.rcvList.Front() 401 v = p.data.Size() 402 } 403 ep.rcvMu.Unlock() 404 return v, nil 405 406 default: 407 return -1, &tcpip.ErrUnknownProtocolOption{} 408 } 409 } 410 411 // HandlePacket implements stack.PacketEndpoint.HandlePacket. 412 func (ep *endpoint) HandlePacket(nicID tcpip.NICID, _ tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { 413 ep.rcvMu.Lock() 414 415 // Drop the packet if our buffer is currently full. 416 if ep.rcvClosed { 417 ep.rcvMu.Unlock() 418 ep.stack.Stats().DroppedPackets.Increment() 419 ep.stats.ReceiveErrors.ClosedReceiver.Increment() 420 return 421 } 422 423 rcvBufSize := ep.ops.GetReceiveBufferSize() 424 if ep.rcvDisabled || ep.rcvBufSize >= int(rcvBufSize) { 425 ep.rcvMu.Unlock() 426 ep.stack.Stats().DroppedPackets.Increment() 427 ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment() 428 return 429 } 430 431 wasEmpty := ep.rcvBufSize == 0 432 433 rcvdPkt := packet{ 434 packetInfo: tcpip.LinkPacketInfo{ 435 Protocol: netProto, 436 PktType: pkt.PktType, 437 }, 438 senderAddr: tcpip.FullAddress{ 439 NIC: nicID, 440 }, 441 receivedAt: ep.stack.Clock().Now(), 442 } 443 444 if !pkt.LinkHeader().View().IsEmpty() { 445 hdr := header.Ethernet(pkt.LinkHeader().View()) 446 rcvdPkt.senderAddr.Addr = tcpip.Address(hdr.SourceAddress()) 447 } 448 449 if ep.cooked { 450 // Cooked packet endpoints don't include the link-headers in received 451 // packets. 452 if v := pkt.NetworkHeader().View(); !v.IsEmpty() { 453 rcvdPkt.data.AppendView(v) 454 } 455 if v := pkt.TransportHeader().View(); !v.IsEmpty() { 456 rcvdPkt.data.AppendView(v) 457 } 458 rcvdPkt.data.Append(pkt.Data().ExtractVV()) 459 } else { 460 // Raw packet endpoints include link-headers in received packets. 461 rcvdPkt.data = buffer.NewVectorisedView(pkt.Size(), pkt.Views()) 462 } 463 464 ep.rcvList.PushBack(&rcvdPkt) 465 ep.rcvBufSize += rcvdPkt.data.Size() 466 467 ep.rcvMu.Unlock() 468 ep.stats.PacketsReceived.Increment() 469 // Notify waiters that there's data to be read. 470 if wasEmpty { 471 ep.waiterQueue.Notify(waiter.ReadableEvents) 472 } 473 } 474 475 // State implements socket.Socket.State. 476 func (*endpoint) State() uint32 { 477 return 0 478 } 479 480 // Info returns a copy of the endpoint info. 481 func (ep *endpoint) Info() tcpip.EndpointInfo { 482 ep.mu.RLock() 483 defer ep.mu.RUnlock() 484 return &stack.TransportEndpointInfo{NetProto: ep.boundNetProto} 485 } 486 487 // Stats returns a pointer to the endpoint stats. 488 func (ep *endpoint) Stats() tcpip.EndpointStats { 489 return &ep.stats 490 } 491 492 // SetOwner implements tcpip.Endpoint.SetOwner. 493 func (*endpoint) SetOwner(tcpip.PacketOwner) {} 494 495 // SocketOptions implements tcpip.Endpoint.SocketOptions. 496 func (ep *endpoint) SocketOptions() *tcpip.SocketOptions { 497 return &ep.ops 498 }