github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/tcpip/transport/packet/endpoint.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package packet provides the implementation of packet sockets (see 16 // packet(7)). Packet sockets allow applications to: 17 // 18 // - manually write and inspect link, network, and transport headers 19 // - receive all traffic of a given network protocol, or all protocols 20 // 21 // Packet sockets are similar to raw sockets, but provide even more power to 22 // users, letting them effectively talk directly to the network device. 23 // 24 // Packet sockets skip the input and output iptables chains. 25 package packet 26 27 import ( 28 "io" 29 "time" 30 31 "github.com/nicocha30/gvisor-ligolo/pkg/buffer" 32 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 33 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip" 34 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header" 35 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/stack" 36 "github.com/nicocha30/gvisor-ligolo/pkg/waiter" 37 ) 38 39 // +stateify savable 40 type packet struct { 41 packetEntry 42 // data holds the actual packet data, including any headers and payload. 43 data stack.PacketBufferPtr 44 receivedAt time.Time `state:".(int64)"` 45 // senderAddr is the network address of the sender. 46 senderAddr tcpip.FullAddress 47 // packetInfo holds additional information like the protocol 48 // of the packet etc. 49 packetInfo tcpip.LinkPacketInfo 50 } 51 52 // endpoint is the packet socket implementation of tcpip.Endpoint. It is legal 53 // to have goroutines make concurrent calls into the endpoint. 54 // 55 // Lock order: 56 // 57 // endpoint.mu 58 // endpoint.rcvMu 59 // 60 // +stateify savable 61 type endpoint struct { 62 tcpip.DefaultSocketOptionsHandler 63 64 // The following fields are initialized at creation time and are 65 // immutable. 66 stack *stack.Stack `state:"manual"` 67 waiterQueue *waiter.Queue 68 cooked bool 69 ops tcpip.SocketOptions 70 stats tcpip.TransportEndpointStats 71 72 // The following fields are used to manage the receive queue. 73 rcvMu sync.Mutex `state:"nosave"` 74 // +checklocks:rcvMu 75 rcvList packetList 76 // +checklocks:rcvMu 77 rcvBufSize int 78 // +checklocks:rcvMu 79 rcvClosed bool 80 // +checklocks:rcvMu 81 rcvDisabled bool 82 83 mu sync.RWMutex `state:"nosave"` 84 // +checklocks:mu 85 closed bool 86 // +checklocks:mu 87 boundNetProto tcpip.NetworkProtocolNumber 88 // +checklocks:mu 89 boundNIC tcpip.NICID 90 91 lastErrorMu sync.Mutex `state:"nosave"` 92 // +checklocks:lastErrorMu 93 lastError tcpip.Error 94 } 95 96 // NewEndpoint returns a new packet endpoint. 97 func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) { 98 ep := &endpoint{ 99 stack: s, 100 cooked: cooked, 101 boundNetProto: netProto, 102 waiterQueue: waiterQueue, 103 } 104 ep.ops.InitHandler(ep, ep.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits) 105 ep.ops.SetReceiveBufferSize(32*1024, false /* notify */) 106 107 // Override with stack defaults. 108 var ss tcpip.SendBufferSizeOption 109 if err := s.Option(&ss); err == nil { 110 ep.ops.SetSendBufferSize(int64(ss.Default), false /* notify */) 111 } 112 113 var rs tcpip.ReceiveBufferSizeOption 114 if err := s.Option(&rs); err == nil { 115 ep.ops.SetReceiveBufferSize(int64(rs.Default), false /* notify */) 116 } 117 118 if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil { 119 return nil, err 120 } 121 return ep, nil 122 } 123 124 // Abort implements stack.TransportEndpoint.Abort. 125 func (ep *endpoint) Abort() { 126 ep.Close() 127 } 128 129 // Close implements tcpip.Endpoint.Close. 130 func (ep *endpoint) Close() { 131 ep.mu.Lock() 132 defer ep.mu.Unlock() 133 134 if ep.closed { 135 return 136 } 137 138 ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep) 139 140 ep.rcvMu.Lock() 141 defer ep.rcvMu.Unlock() 142 143 // Clear the receive list. 144 ep.rcvClosed = true 145 ep.rcvBufSize = 0 146 for !ep.rcvList.Empty() { 147 p := ep.rcvList.Front() 148 ep.rcvList.Remove(p) 149 p.data.DecRef() 150 } 151 152 ep.closed = true 153 ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents) 154 } 155 156 // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. 157 func (*endpoint) ModerateRecvBuf(int) {} 158 159 // Read implements tcpip.Endpoint.Read. 160 func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) { 161 ep.rcvMu.Lock() 162 163 // If there's no data to read, return that read would block or that the 164 // endpoint is closed. 165 if ep.rcvList.Empty() { 166 var err tcpip.Error = &tcpip.ErrWouldBlock{} 167 if ep.rcvClosed { 168 ep.stats.ReadErrors.ReadClosed.Increment() 169 err = &tcpip.ErrClosedForReceive{} 170 } 171 ep.rcvMu.Unlock() 172 return tcpip.ReadResult{}, err 173 } 174 175 packet := ep.rcvList.Front() 176 if !opts.Peek { 177 ep.rcvList.Remove(packet) 178 defer packet.data.DecRef() 179 ep.rcvBufSize -= packet.data.Size() 180 } 181 182 ep.rcvMu.Unlock() 183 184 res := tcpip.ReadResult{ 185 Total: packet.data.Size(), 186 ControlMessages: tcpip.ReceivableControlMessages{ 187 HasTimestamp: true, 188 Timestamp: packet.receivedAt, 189 }, 190 } 191 if opts.NeedRemoteAddr { 192 res.RemoteAddr = packet.senderAddr 193 } 194 if opts.NeedLinkPacketInfo { 195 res.LinkPacketInfo = packet.packetInfo 196 } 197 198 n, err := packet.data.Data().ReadTo(dst, opts.Peek) 199 if n == 0 && err != nil { 200 return res, &tcpip.ErrBadBuffer{} 201 } 202 res.Count = n 203 return res, nil 204 } 205 206 func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) { 207 if !ep.stack.PacketEndpointWriteSupported() { 208 return 0, &tcpip.ErrNotSupported{} 209 } 210 211 ep.mu.Lock() 212 closed := ep.closed 213 nicID := ep.boundNIC 214 proto := ep.boundNetProto 215 ep.mu.Unlock() 216 if closed { 217 return 0, &tcpip.ErrClosedForSend{} 218 } 219 220 var remote tcpip.LinkAddress 221 if to := opts.To; to != nil { 222 remote = to.LinkAddr 223 224 if n := to.NIC; n != 0 { 225 nicID = n 226 } 227 228 if p := to.Port; p != 0 { 229 proto = tcpip.NetworkProtocolNumber(p) 230 } 231 } 232 233 if nicID == 0 { 234 return 0, &tcpip.ErrInvalidOptionValue{} 235 } 236 237 // Prevents giant buffer allocations. 238 if p.Len() > header.DatagramMaximumSize { 239 return 0, &tcpip.ErrMessageTooLong{} 240 } 241 242 var payload buffer.Buffer 243 if _, err := payload.WriteFromReader(p, int64(p.Len())); err != nil { 244 return 0, &tcpip.ErrBadBuffer{} 245 } 246 payloadSz := payload.Size() 247 248 if err := func() tcpip.Error { 249 if ep.cooked { 250 return ep.stack.WritePacketToRemote(nicID, remote, proto, payload) 251 } 252 return ep.stack.WriteRawPacket(nicID, proto, payload) 253 }(); err != nil { 254 return 0, err 255 } 256 return payloadSz, nil 257 } 258 259 // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be 260 // disconnected, and this function always returns tpcip.ErrNotSupported. 261 func (*endpoint) Disconnect() tcpip.Error { 262 return &tcpip.ErrNotSupported{} 263 } 264 265 // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be 266 // connected, and this function always returnes *tcpip.ErrNotSupported. 267 func (*endpoint) Connect(tcpip.FullAddress) tcpip.Error { 268 return &tcpip.ErrNotSupported{} 269 } 270 271 // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used 272 // with Shutdown, and this function always returns *tcpip.ErrNotSupported. 273 func (*endpoint) Shutdown(tcpip.ShutdownFlags) tcpip.Error { 274 return &tcpip.ErrNotSupported{} 275 } 276 277 // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with 278 // Listen, and this function always returns *tcpip.ErrNotSupported. 279 func (*endpoint) Listen(int) tcpip.Error { 280 return &tcpip.ErrNotSupported{} 281 } 282 283 // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with 284 // Accept, and this function always returns *tcpip.ErrNotSupported. 285 func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) { 286 return nil, nil, &tcpip.ErrNotSupported{} 287 } 288 289 // Bind implements tcpip.Endpoint.Bind. 290 func (ep *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error { 291 // "By default, all packets of the specified protocol type are passed 292 // to a packet socket. To get packets only from a specific interface 293 // use bind(2) specifying an address in a struct sockaddr_ll to bind 294 // the packet socket to an interface. Fields used for binding are 295 // sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex." 296 // - packet(7). 297 298 ep.mu.Lock() 299 defer ep.mu.Unlock() 300 301 netProto := tcpip.NetworkProtocolNumber(addr.Port) 302 if netProto == 0 { 303 // Do not allow unbinding the network protocol. 304 netProto = ep.boundNetProto 305 } 306 307 if ep.boundNIC == addr.NIC && ep.boundNetProto == netProto { 308 // Already bound to the requested NIC and network protocol. 309 return nil 310 } 311 312 // TODO(https://gvisor.dev/issue/6618): Unregister after registering the new 313 // binding. 314 ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep) 315 ep.boundNIC = 0 316 ep.boundNetProto = 0 317 318 // Bind endpoint to receive packets from specific interface. 319 if err := ep.stack.RegisterPacketEndpoint(addr.NIC, netProto, ep); err != nil { 320 return err 321 } 322 323 ep.boundNIC = addr.NIC 324 ep.boundNetProto = netProto 325 return nil 326 } 327 328 // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. 329 func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { 330 ep.mu.RLock() 331 defer ep.mu.RUnlock() 332 333 return tcpip.FullAddress{ 334 NIC: ep.boundNIC, 335 Port: uint16(ep.boundNetProto), 336 }, nil 337 } 338 339 // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress. 340 func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) { 341 // Even a connected socket doesn't return a remote address. 342 return tcpip.FullAddress{}, &tcpip.ErrNotConnected{} 343 } 344 345 // Readiness implements tcpip.Endpoint.Readiness. 346 func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { 347 // The endpoint is always writable. 348 result := waiter.WritableEvents & mask 349 350 // Determine whether the endpoint is readable. 351 if (mask & waiter.ReadableEvents) != 0 { 352 ep.rcvMu.Lock() 353 if !ep.rcvList.Empty() || ep.rcvClosed { 354 result |= waiter.ReadableEvents 355 } 356 ep.rcvMu.Unlock() 357 } 358 359 return result 360 } 361 362 // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be 363 // used with SetSockOpt, and this function always returns 364 // *tcpip.ErrNotSupported. 365 func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error { 366 switch opt.(type) { 367 case *tcpip.SocketDetachFilterOption: 368 return nil 369 370 default: 371 return &tcpip.ErrUnknownProtocolOption{} 372 } 373 } 374 375 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. 376 func (*endpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error { 377 return &tcpip.ErrUnknownProtocolOption{} 378 } 379 380 func (ep *endpoint) LastError() tcpip.Error { 381 ep.lastErrorMu.Lock() 382 defer ep.lastErrorMu.Unlock() 383 384 err := ep.lastError 385 ep.lastError = nil 386 return err 387 } 388 389 // UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError. 390 func (ep *endpoint) UpdateLastError(err tcpip.Error) { 391 ep.lastErrorMu.Lock() 392 ep.lastError = err 393 ep.lastErrorMu.Unlock() 394 } 395 396 // GetSockOpt implements tcpip.Endpoint.GetSockOpt. 397 func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) tcpip.Error { 398 return &tcpip.ErrNotSupported{} 399 } 400 401 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. 402 func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) { 403 switch opt { 404 case tcpip.ReceiveQueueSizeOption: 405 v := 0 406 ep.rcvMu.Lock() 407 if !ep.rcvList.Empty() { 408 p := ep.rcvList.Front() 409 v = p.data.Size() 410 } 411 ep.rcvMu.Unlock() 412 return v, nil 413 414 default: 415 return -1, &tcpip.ErrUnknownProtocolOption{} 416 } 417 } 418 419 // HandlePacket implements stack.PacketEndpoint.HandlePacket. 420 func (ep *endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt stack.PacketBufferPtr) { 421 ep.rcvMu.Lock() 422 423 // Drop the packet if our buffer is currently full. 424 if ep.rcvClosed { 425 ep.rcvMu.Unlock() 426 ep.stack.Stats().DroppedPackets.Increment() 427 ep.stats.ReceiveErrors.ClosedReceiver.Increment() 428 return 429 } 430 431 rcvBufSize := ep.ops.GetReceiveBufferSize() 432 if ep.rcvDisabled || ep.rcvBufSize >= int(rcvBufSize) { 433 ep.rcvMu.Unlock() 434 ep.stack.Stats().DroppedPackets.Increment() 435 ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment() 436 return 437 } 438 439 wasEmpty := ep.rcvBufSize == 0 440 441 rcvdPkt := packet{ 442 packetInfo: tcpip.LinkPacketInfo{ 443 Protocol: netProto, 444 PktType: pkt.PktType, 445 }, 446 senderAddr: tcpip.FullAddress{ 447 NIC: nicID, 448 }, 449 receivedAt: ep.stack.Clock().Now(), 450 } 451 452 if len(pkt.LinkHeader().Slice()) != 0 { 453 hdr := header.Ethernet(pkt.LinkHeader().Slice()) 454 rcvdPkt.senderAddr.LinkAddr = hdr.SourceAddress() 455 } 456 457 // Raw packet endpoints include link-headers in received packets. 458 pktBuf := pkt.ToBuffer() 459 if ep.cooked { 460 // Cooked packet endpoints don't include the link-headers in received 461 // packets. 462 pktBuf.TrimFront(int64(len(pkt.LinkHeader().Slice()) + len(pkt.VirtioNetHeader().Slice()))) 463 } 464 rcvdPkt.data = stack.NewPacketBuffer(stack.PacketBufferOptions{Payload: pktBuf}) 465 466 ep.rcvList.PushBack(&rcvdPkt) 467 ep.rcvBufSize += rcvdPkt.data.Size() 468 469 ep.rcvMu.Unlock() 470 ep.stats.PacketsReceived.Increment() 471 // Notify waiters that there's data to be read. 472 if wasEmpty { 473 ep.waiterQueue.Notify(waiter.ReadableEvents) 474 } 475 } 476 477 // State implements socket.Socket.State. 478 func (*endpoint) State() uint32 { 479 return 0 480 } 481 482 // Info returns a copy of the endpoint info. 483 func (ep *endpoint) Info() tcpip.EndpointInfo { 484 ep.mu.RLock() 485 defer ep.mu.RUnlock() 486 return &stack.TransportEndpointInfo{NetProto: ep.boundNetProto} 487 } 488 489 // Stats returns a pointer to the endpoint stats. 490 func (ep *endpoint) Stats() tcpip.EndpointStats { 491 return &ep.stats 492 } 493 494 // SetOwner implements tcpip.Endpoint.SetOwner. 495 func (*endpoint) SetOwner(tcpip.PacketOwner) {} 496 497 // SocketOptions implements tcpip.Endpoint.SocketOptions. 498 func (ep *endpoint) SocketOptions() *tcpip.SocketOptions { 499 return &ep.ops 500 }