github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/unet/unet.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package unet provides a minimal net package based on Unix Domain Sockets. 16 // 17 // This does no pooling, and should only be used for a limited number of 18 // connections in a Go process. Don't use this package for arbitrary servers. 19 package unet 20 21 import ( 22 "errors" 23 24 "golang.org/x/sys/unix" 25 "github.com/metacubex/gvisor/pkg/atomicbitops" 26 "github.com/metacubex/gvisor/pkg/eventfd" 27 "github.com/metacubex/gvisor/pkg/sync" 28 ) 29 30 // backlog is used for the listen request. 31 const backlog = 16 32 33 // errClosing is returned by wait if the Socket is in the process of closing. 34 var errClosing = errors.New("Socket is closing") 35 36 // errMessageTruncated indicates that data was lost because the provided buffer 37 // was too small. 38 var errMessageTruncated = errors.New("message truncated") 39 40 // socketType returns the appropriate type. 41 func socketType(packet bool) int { 42 if packet { 43 return unix.SOCK_SEQPACKET 44 } 45 return unix.SOCK_STREAM 46 } 47 48 // socket creates a new host socket. 49 func socket(packet bool) (int, error) { 50 // Make a new socket. 51 fd, err := unix.Socket(unix.AF_UNIX, socketType(packet), 0) 52 if err != nil { 53 return 0, err 54 } 55 56 return fd, nil 57 } 58 59 // Socket is a connected unix domain socket. 60 type Socket struct { 61 // gate protects use of fd. 62 gate sync.Gate 63 64 // fd is the bound socket. 65 // 66 // fd only remains valid if read while within gate. 67 fd atomicbitops.Int32 68 69 // efd is an event FD that is signaled when the socket is closing. 70 // 71 // efd is immutable and remains valid until Close/Release. 72 efd eventfd.Eventfd 73 74 // race is an atomic variable used to avoid triggering the race 75 // detector. See comment in SocketPair below. 76 race *atomicbitops.Int32 77 } 78 79 // NewSocket returns a socket from an existing FD. 80 // 81 // NewSocket takes ownership of fd. 82 func NewSocket(fd int) (*Socket, error) { 83 // fd must be non-blocking for non-blocking unix.Accept in 84 // ServerSocket.Accept. 85 if err := unix.SetNonblock(fd, true); err != nil { 86 return nil, err 87 } 88 89 efd, err := eventfd.Create() 90 if err != nil { 91 return nil, err 92 } 93 94 return &Socket{ 95 fd: atomicbitops.FromInt32(int32(fd)), 96 efd: efd, 97 }, nil 98 } 99 100 // finish completes use of s.fd by evicting any waiters, closing the gate, and 101 // closing the event FD. 102 func (s *Socket) finish() error { 103 // Signal any blocked or future polls. 104 if err := s.efd.Notify(); err != nil { 105 return err 106 } 107 108 // Close the gate, blocking until all FD users leave. 109 s.gate.Close() 110 111 return s.efd.Close() 112 } 113 114 // Close closes the socket. 115 func (s *Socket) Close() error { 116 // Set the FD in the socket to -1, to ensure that all future calls to 117 // FD/Release get nothing and Close calls return immediately. 118 fd := int(s.fd.Swap(-1)) 119 if fd < 0 { 120 // Already closed or closing. 121 return unix.EBADF 122 } 123 124 // Shutdown the socket to cancel any pending accepts. 125 s.shutdown(fd) 126 127 if err := s.finish(); err != nil { 128 return err 129 } 130 131 return unix.Close(fd) 132 } 133 134 // Release releases ownership of the socket FD. 135 // 136 // The returned FD is non-blocking. 137 // 138 // Any concurrent or future callers of Socket methods will receive EBADF. 139 func (s *Socket) Release() (int, error) { 140 // Set the FD in the socket to -1, to ensure that all future calls to 141 // FD/Release get nothing and Close calls return immediately. 142 fd := int(s.fd.Swap(-1)) 143 if fd < 0 { 144 // Already closed or closing. 145 return -1, unix.EBADF 146 } 147 148 if err := s.finish(); err != nil { 149 return -1, err 150 } 151 152 return fd, nil 153 } 154 155 // FD returns the FD for this Socket. 156 // 157 // The FD is non-blocking and must not be made blocking. 158 // 159 // N.B. os.File.Fd makes the FD blocking. Use of Release instead of FD is 160 // strongly preferred. 161 // 162 // The returned FD cannot be used safely if there may be concurrent callers to 163 // Close or Release. 164 // 165 // Use Release to take ownership of the FD. 166 func (s *Socket) FD() int { 167 return int(s.fd.Load()) 168 } 169 170 // enterFD enters the FD gate and returns the FD value. 171 // 172 // If enterFD returns ok, s.gate.Leave must be called when done with the FD. 173 // Callers may only block while within the gate using s.wait. 174 // 175 // The returned FD is guaranteed to remain valid until s.gate.Leave. 176 func (s *Socket) enterFD() (int, bool) { 177 if !s.gate.Enter() { 178 return -1, false 179 } 180 181 fd := int(s.fd.Load()) 182 if fd < 0 { 183 s.gate.Leave() 184 return -1, false 185 } 186 187 return fd, true 188 } 189 190 // SocketPair creates a pair of connected sockets. 191 func SocketPair(packet bool) (*Socket, *Socket, error) { 192 // Make a new pair. 193 fds, err := unix.Socketpair(unix.AF_UNIX, socketType(packet)|unix.SOCK_CLOEXEC, 0) 194 if err != nil { 195 return nil, nil, err 196 } 197 198 // race is an atomic variable used to avoid triggering the race 199 // detector. We have to fool TSAN into thinking there is a race 200 // variable between our two sockets. We only use SocketPair in tests 201 // anyway. 202 // 203 // NOTE(b/27107811): This is purely due to the fact that the raw 204 // syscall does not serve as a boundary for the sanitizer. 205 a, err := NewSocket(fds[0]) 206 if err != nil { 207 unix.Close(fds[0]) 208 unix.Close(fds[1]) 209 return nil, nil, err 210 } 211 var race atomicbitops.Int32 212 a.race = &race 213 b, err := NewSocket(fds[1]) 214 if err != nil { 215 a.Close() 216 unix.Close(fds[1]) 217 return nil, nil, err 218 } 219 b.race = &race 220 return a, b, nil 221 } 222 223 // Connect connects to a server. 224 func Connect(addr string, packet bool) (*Socket, error) { 225 fd, err := socket(packet) 226 if err != nil { 227 return nil, err 228 } 229 230 // Connect the socket. 231 usa := &unix.SockaddrUnix{Name: addr} 232 if err := unix.Connect(fd, usa); err != nil { 233 unix.Close(fd) 234 return nil, err 235 } 236 237 return NewSocket(fd) 238 } 239 240 // ControlMessage wraps around a byte array and provides functions for parsing 241 // as a Unix Domain Socket control message. 242 type ControlMessage []byte 243 244 // EnableFDs enables receiving FDs via control message. 245 // 246 // This guarantees only a MINIMUM number of FDs received. You may receive MORE 247 // than this due to the way FDs are packed. To be specific, the number of 248 // receivable buffers will be rounded up to the nearest even number. 249 // 250 // This must be called prior to ReadVec if you want to receive FDs. 251 func (c *ControlMessage) EnableFDs(count int) { 252 *c = make([]byte, unix.CmsgSpace(count*4)) 253 } 254 255 // ExtractFDs returns the list of FDs in the control message. 256 // 257 // Either this or CloseFDs should be used after EnableFDs. 258 func (c *ControlMessage) ExtractFDs() ([]int, error) { 259 msgs, err := unix.ParseSocketControlMessage(*c) 260 if err != nil { 261 return nil, err 262 } 263 var fds []int 264 for _, msg := range msgs { 265 thisFds, err := unix.ParseUnixRights(&msg) 266 if err != nil { 267 // Different control message. 268 return nil, err 269 } 270 for _, fd := range thisFds { 271 if fd >= 0 { 272 fds = append(fds, fd) 273 } 274 } 275 } 276 return fds, nil 277 } 278 279 // CloseFDs closes the list of FDs in the control message. 280 // 281 // Either this or ExtractFDs should be used after EnableFDs. 282 func (c *ControlMessage) CloseFDs() { 283 fds, _ := c.ExtractFDs() 284 for _, fd := range fds { 285 if fd >= 0 { 286 unix.Close(fd) 287 } 288 } 289 } 290 291 // PackFDs packs the given list of FDs in the control message. 292 // 293 // This must be used prior to WriteVec. 294 func (c *ControlMessage) PackFDs(fds ...int) { 295 *c = ControlMessage(unix.UnixRights(fds...)) 296 } 297 298 // UnpackFDs clears the control message. 299 func (c *ControlMessage) UnpackFDs() { 300 *c = nil 301 } 302 303 // SocketWriter wraps an individual send operation. 304 // 305 // The normal entrypoint is WriteVec. 306 type SocketWriter struct { 307 socket *Socket 308 to []byte 309 blocking bool 310 race *atomicbitops.Int32 311 312 ControlMessage 313 } 314 315 // Writer returns a writer for this socket. 316 func (s *Socket) Writer(blocking bool) SocketWriter { 317 return SocketWriter{socket: s, blocking: blocking, race: s.race} 318 } 319 320 // Write implements io.Writer.Write. 321 func (s *Socket) Write(p []byte) (int, error) { 322 r := s.Writer(true) 323 return r.WriteVec([][]byte{p}) 324 } 325 326 // GetSockOpt gets the given socket option. 327 func (s *Socket) GetSockOpt(level int, name int, b []byte) (uint32, error) { 328 fd, ok := s.enterFD() 329 if !ok { 330 return 0, unix.EBADF 331 } 332 defer s.gate.Leave() 333 334 return getsockopt(fd, level, name, b) 335 } 336 337 // SetSockOpt sets the given socket option. 338 func (s *Socket) SetSockOpt(level, name int, b []byte) error { 339 fd, ok := s.enterFD() 340 if !ok { 341 return unix.EBADF 342 } 343 defer s.gate.Leave() 344 345 return setsockopt(fd, level, name, b) 346 } 347 348 // GetSockName returns the socket name. 349 func (s *Socket) GetSockName() ([]byte, error) { 350 fd, ok := s.enterFD() 351 if !ok { 352 return nil, unix.EBADF 353 } 354 defer s.gate.Leave() 355 356 var buf []byte 357 l := unix.SizeofSockaddrAny 358 359 for { 360 // If the buffer is not large enough, allocate a new one with the hint. 361 buf = make([]byte, l) 362 l, err := getsockname(fd, buf) 363 if err != nil { 364 return nil, err 365 } 366 367 if l <= uint32(len(buf)) { 368 return buf[:l], nil 369 } 370 } 371 } 372 373 // GetPeerName returns the peer name. 374 func (s *Socket) GetPeerName() ([]byte, error) { 375 fd, ok := s.enterFD() 376 if !ok { 377 return nil, unix.EBADF 378 } 379 defer s.gate.Leave() 380 381 var buf []byte 382 l := unix.SizeofSockaddrAny 383 384 for { 385 // See above. 386 buf = make([]byte, l) 387 l, err := getpeername(fd, buf) 388 if err != nil { 389 return nil, err 390 } 391 392 if l <= uint32(len(buf)) { 393 return buf[:l], nil 394 } 395 } 396 } 397 398 // SocketReader wraps an individual receive operation. 399 // 400 // This may be used for doing vectorized reads and/or sending additional 401 // control messages (e.g. FDs). The normal entrypoint is ReadVec. 402 // 403 // One of ExtractFDs or DisposeFDs must be called if EnableFDs is used. 404 type SocketReader struct { 405 socket *Socket 406 source []byte 407 blocking bool 408 race *atomicbitops.Int32 409 410 ControlMessage 411 } 412 413 // Reader returns a reader for this socket. 414 func (s *Socket) Reader(blocking bool) SocketReader { 415 return SocketReader{socket: s, blocking: blocking, race: s.race} 416 } 417 418 // Read implements io.Reader.Read. 419 func (s *Socket) Read(p []byte) (int, error) { 420 r := s.Reader(true) 421 return r.ReadVec([][]byte{p}) 422 } 423 424 func (s *Socket) shutdown(fd int) error { 425 // Shutdown the socket to cancel any pending accepts. 426 return unix.Shutdown(fd, unix.SHUT_RDWR) 427 } 428 429 // Shutdown closes the socket for read and write. 430 func (s *Socket) Shutdown() error { 431 fd, ok := s.enterFD() 432 if !ok { 433 return unix.EBADF 434 } 435 defer s.gate.Leave() 436 437 return s.shutdown(fd) 438 } 439 440 // ServerSocket is a bound unix domain socket. 441 type ServerSocket struct { 442 socket *Socket 443 } 444 445 // NewServerSocket returns a socket from an existing FD. 446 func NewServerSocket(fd int) (*ServerSocket, error) { 447 s, err := NewSocket(fd) 448 if err != nil { 449 return nil, err 450 } 451 return &ServerSocket{socket: s}, nil 452 } 453 454 // Bind creates and binds a new socket. 455 func Bind(addr string, packet bool) (*ServerSocket, error) { 456 fd, err := socket(packet) 457 if err != nil { 458 return nil, err 459 } 460 461 // Do the bind. 462 usa := &unix.SockaddrUnix{Name: addr} 463 if err := unix.Bind(fd, usa); err != nil { 464 unix.Close(fd) 465 return nil, err 466 } 467 468 return NewServerSocket(fd) 469 } 470 471 // BindAndListen creates, binds and listens on a new socket. 472 func BindAndListen(addr string, packet bool) (*ServerSocket, error) { 473 s, err := Bind(addr, packet) 474 if err != nil { 475 return nil, err 476 } 477 478 // Start listening. 479 if err := s.Listen(); err != nil { 480 s.Close() 481 return nil, err 482 } 483 484 return s, nil 485 } 486 487 // Listen starts listening on the socket. 488 func (s *ServerSocket) Listen() error { 489 fd, ok := s.socket.enterFD() 490 if !ok { 491 return unix.EBADF 492 } 493 defer s.socket.gate.Leave() 494 495 return unix.Listen(fd, backlog) 496 } 497 498 // Accept accepts a new connection. 499 // 500 // This is always blocking. 501 // 502 // Preconditions: 503 // - ServerSocket is listening (Listen called). 504 func (s *ServerSocket) Accept() (*Socket, error) { 505 fd, ok := s.socket.enterFD() 506 if !ok { 507 return nil, unix.EBADF 508 } 509 defer s.socket.gate.Leave() 510 511 for { 512 nfd, _, err := unix.Accept(fd) 513 switch err { 514 case nil: 515 return NewSocket(nfd) 516 case unix.EAGAIN: 517 err = s.socket.wait(false) 518 if err == errClosing { 519 err = unix.EBADF 520 } 521 } 522 if err != nil { 523 return nil, err 524 } 525 } 526 } 527 528 // Close closes the server socket. 529 // 530 // This must only be called once. 531 func (s *ServerSocket) Close() error { 532 return s.socket.Close() 533 } 534 535 // FD returns the socket's file descriptor. 536 // 537 // See Socket.FD. 538 func (s *ServerSocket) FD() int { 539 return s.socket.FD() 540 } 541 542 // Release releases ownership of the socket's file descriptor. 543 // 544 // See Socket.Release. 545 func (s *ServerSocket) Release() (int, error) { 546 return s.socket.Release() 547 }