github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/tcpip/adapters/gonet/gonet.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package gonet provides a Go net package compatible wrapper for a tcpip stack. 16 package gonet 17 18 import ( 19 "bytes" 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "net" 25 "time" 26 27 "github.com/ttpreport/gvisor-ligolo/pkg/sync" 28 "github.com/ttpreport/gvisor-ligolo/pkg/tcpip" 29 "github.com/ttpreport/gvisor-ligolo/pkg/tcpip/stack" 30 "github.com/ttpreport/gvisor-ligolo/pkg/tcpip/transport/tcp" 31 "github.com/ttpreport/gvisor-ligolo/pkg/tcpip/transport/udp" 32 "github.com/ttpreport/gvisor-ligolo/pkg/waiter" 33 ) 34 35 var ( 36 errCanceled = errors.New("operation canceled") 37 errWouldBlock = errors.New("operation would block") 38 ) 39 40 // timeoutError is how the net package reports timeouts. 41 type timeoutError struct{} 42 43 func (e *timeoutError) Error() string { return "i/o timeout" } 44 func (e *timeoutError) Timeout() bool { return true } 45 func (e *timeoutError) Temporary() bool { return true } 46 47 // A TCPListener is a wrapper around a TCP tcpip.Endpoint that implements 48 // net.Listener. 49 type TCPListener struct { 50 stack *stack.Stack 51 ep tcpip.Endpoint 52 wq *waiter.Queue 53 cancelOnce sync.Once 54 cancel chan struct{} 55 } 56 57 // NewTCPListener creates a new TCPListener from a listening tcpip.Endpoint. 58 func NewTCPListener(s *stack.Stack, wq *waiter.Queue, ep tcpip.Endpoint) *TCPListener { 59 return &TCPListener{ 60 stack: s, 61 ep: ep, 62 wq: wq, 63 cancel: make(chan struct{}), 64 } 65 } 66 67 // maxListenBacklog is set to be reasonably high for most uses of gonet. Go net 68 // package uses the value in /proc/sys/net/core/somaxconn file in Linux as the 69 // default listen backlog. The value below matches the default in common linux 70 // distros. 71 // 72 // See: https://cs.opensource.google/go/go/+/refs/tags/go1.18.1:src/net/sock_linux.go;drc=refs%2Ftags%2Fgo1.18.1;l=66 73 const maxListenBacklog = 4096 74 75 // ListenTCP creates a new TCPListener. 76 func ListenTCP(s *stack.Stack, addr tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*TCPListener, error) { 77 // Create a TCP endpoint, bind it, then start listening. 78 var wq waiter.Queue 79 ep, err := s.NewEndpoint(tcp.ProtocolNumber, network, &wq) 80 if err != nil { 81 return nil, errors.New(err.String()) 82 } 83 84 if err := ep.Bind(addr); err != nil { 85 ep.Close() 86 return nil, &net.OpError{ 87 Op: "bind", 88 Net: "tcp", 89 Addr: fullToTCPAddr(addr), 90 Err: errors.New(err.String()), 91 } 92 } 93 94 if err := ep.Listen(maxListenBacklog); err != nil { 95 ep.Close() 96 return nil, &net.OpError{ 97 Op: "listen", 98 Net: "tcp", 99 Addr: fullToTCPAddr(addr), 100 Err: errors.New(err.String()), 101 } 102 } 103 104 return NewTCPListener(s, &wq, ep), nil 105 } 106 107 // Close implements net.Listener.Close. 108 func (l *TCPListener) Close() error { 109 l.ep.Close() 110 return nil 111 } 112 113 // Shutdown stops the HTTP server. 114 func (l *TCPListener) Shutdown() { 115 l.ep.Shutdown(tcpip.ShutdownWrite | tcpip.ShutdownRead) 116 l.cancelOnce.Do(func() { 117 close(l.cancel) // broadcast cancellation 118 }) 119 } 120 121 // Addr implements net.Listener.Addr. 122 func (l *TCPListener) Addr() net.Addr { 123 a, err := l.ep.GetLocalAddress() 124 if err != nil { 125 return nil 126 } 127 return fullToTCPAddr(a) 128 } 129 130 type deadlineTimer struct { 131 // mu protects the fields below. 132 mu sync.Mutex 133 134 readTimer *time.Timer 135 readCancelCh chan struct{} 136 writeTimer *time.Timer 137 writeCancelCh chan struct{} 138 } 139 140 func (d *deadlineTimer) init() { 141 d.readCancelCh = make(chan struct{}) 142 d.writeCancelCh = make(chan struct{}) 143 } 144 145 func (d *deadlineTimer) readCancel() <-chan struct{} { 146 d.mu.Lock() 147 c := d.readCancelCh 148 d.mu.Unlock() 149 return c 150 } 151 func (d *deadlineTimer) writeCancel() <-chan struct{} { 152 d.mu.Lock() 153 c := d.writeCancelCh 154 d.mu.Unlock() 155 return c 156 } 157 158 // setDeadline contains the shared logic for setting a deadline. 159 // 160 // cancelCh and timer must be pointers to deadlineTimer.readCancelCh and 161 // deadlineTimer.readTimer or deadlineTimer.writeCancelCh and 162 // deadlineTimer.writeTimer. 163 // 164 // setDeadline must only be called while holding d.mu. 165 func (d *deadlineTimer) setDeadline(cancelCh *chan struct{}, timer **time.Timer, t time.Time) { 166 if *timer != nil && !(*timer).Stop() { 167 *cancelCh = make(chan struct{}) 168 } 169 170 // Create a new channel if we already closed it due to setting an already 171 // expired time. We won't race with the timer because we already handled 172 // that above. 173 select { 174 case <-*cancelCh: 175 *cancelCh = make(chan struct{}) 176 default: 177 } 178 179 // "A zero value for t means I/O operations will not time out." 180 // - net.Conn.SetDeadline 181 if t.IsZero() { 182 return 183 } 184 185 timeout := t.Sub(time.Now()) 186 if timeout <= 0 { 187 close(*cancelCh) 188 return 189 } 190 191 // Timer.Stop returns whether or not the AfterFunc has started, but 192 // does not indicate whether or not it has completed. Make a copy of 193 // the cancel channel to prevent this code from racing with the next 194 // call of setDeadline replacing *cancelCh. 195 ch := *cancelCh 196 *timer = time.AfterFunc(timeout, func() { 197 close(ch) 198 }) 199 } 200 201 // SetReadDeadline implements net.Conn.SetReadDeadline and 202 // net.PacketConn.SetReadDeadline. 203 func (d *deadlineTimer) SetReadDeadline(t time.Time) error { 204 d.mu.Lock() 205 d.setDeadline(&d.readCancelCh, &d.readTimer, t) 206 d.mu.Unlock() 207 return nil 208 } 209 210 // SetWriteDeadline implements net.Conn.SetWriteDeadline and 211 // net.PacketConn.SetWriteDeadline. 212 func (d *deadlineTimer) SetWriteDeadline(t time.Time) error { 213 d.mu.Lock() 214 d.setDeadline(&d.writeCancelCh, &d.writeTimer, t) 215 d.mu.Unlock() 216 return nil 217 } 218 219 // SetDeadline implements net.Conn.SetDeadline and net.PacketConn.SetDeadline. 220 func (d *deadlineTimer) SetDeadline(t time.Time) error { 221 d.mu.Lock() 222 d.setDeadline(&d.readCancelCh, &d.readTimer, t) 223 d.setDeadline(&d.writeCancelCh, &d.writeTimer, t) 224 d.mu.Unlock() 225 return nil 226 } 227 228 // A TCPConn is a wrapper around a TCP tcpip.Endpoint that implements the net.Conn 229 // interface. 230 type TCPConn struct { 231 deadlineTimer 232 233 wq *waiter.Queue 234 ep tcpip.Endpoint 235 236 // readMu serializes reads and implicitly protects read. 237 // 238 // Lock ordering: 239 // If both readMu and deadlineTimer.mu are to be used in a single 240 // request, readMu must be acquired before deadlineTimer.mu. 241 readMu sync.Mutex 242 243 // read contains bytes that have been read from the endpoint, 244 // but haven't yet been returned. 245 read []byte 246 } 247 248 // NewTCPConn creates a new TCPConn. 249 func NewTCPConn(wq *waiter.Queue, ep tcpip.Endpoint) *TCPConn { 250 c := &TCPConn{ 251 wq: wq, 252 ep: ep, 253 } 254 c.deadlineTimer.init() 255 return c 256 } 257 258 // Accept implements net.Conn.Accept. 259 func (l *TCPListener) Accept() (net.Conn, error) { 260 n, wq, err := l.ep.Accept(nil) 261 262 if _, ok := err.(*tcpip.ErrWouldBlock); ok { 263 // Create wait queue entry that notifies a channel. 264 waitEntry, notifyCh := waiter.NewChannelEntry(waiter.ReadableEvents) 265 l.wq.EventRegister(&waitEntry) 266 defer l.wq.EventUnregister(&waitEntry) 267 268 for { 269 n, wq, err = l.ep.Accept(nil) 270 271 if _, ok := err.(*tcpip.ErrWouldBlock); !ok { 272 break 273 } 274 275 select { 276 case <-l.cancel: 277 return nil, errCanceled 278 case <-notifyCh: 279 } 280 } 281 } 282 283 if err != nil { 284 return nil, &net.OpError{ 285 Op: "accept", 286 Net: "tcp", 287 Addr: l.Addr(), 288 Err: errors.New(err.String()), 289 } 290 } 291 292 return NewTCPConn(wq, n), nil 293 } 294 295 type opErrorer interface { 296 newOpError(op string, err error) *net.OpError 297 } 298 299 // commonRead implements the common logic between net.Conn.Read and 300 // net.PacketConn.ReadFrom. 301 func commonRead(b []byte, ep tcpip.Endpoint, wq *waiter.Queue, deadline <-chan struct{}, addr *tcpip.FullAddress, errorer opErrorer) (int, error) { 302 select { 303 case <-deadline: 304 return 0, errorer.newOpError("read", &timeoutError{}) 305 default: 306 } 307 308 w := tcpip.SliceWriter(b) 309 opts := tcpip.ReadOptions{NeedRemoteAddr: addr != nil} 310 res, err := ep.Read(&w, opts) 311 312 if _, ok := err.(*tcpip.ErrWouldBlock); ok { 313 // Create wait queue entry that notifies a channel. 314 waitEntry, notifyCh := waiter.NewChannelEntry(waiter.ReadableEvents) 315 wq.EventRegister(&waitEntry) 316 defer wq.EventUnregister(&waitEntry) 317 for { 318 res, err = ep.Read(&w, opts) 319 if _, ok := err.(*tcpip.ErrWouldBlock); !ok { 320 break 321 } 322 select { 323 case <-deadline: 324 return 0, errorer.newOpError("read", &timeoutError{}) 325 case <-notifyCh: 326 } 327 } 328 } 329 330 if _, ok := err.(*tcpip.ErrClosedForReceive); ok { 331 return 0, io.EOF 332 } 333 334 if err != nil { 335 return 0, errorer.newOpError("read", errors.New(err.String())) 336 } 337 338 if addr != nil { 339 *addr = res.RemoteAddr 340 } 341 return res.Count, nil 342 } 343 344 // Read implements net.Conn.Read. 345 func (c *TCPConn) Read(b []byte) (int, error) { 346 c.readMu.Lock() 347 defer c.readMu.Unlock() 348 349 deadline := c.readCancel() 350 351 n, err := commonRead(b, c.ep, c.wq, deadline, nil, c) 352 if n != 0 { 353 c.ep.ModerateRecvBuf(n) 354 } 355 return n, err 356 } 357 358 // Write implements net.Conn.Write. 359 func (c *TCPConn) Write(b []byte) (int, error) { 360 deadline := c.writeCancel() 361 362 // Check if deadlineTimer has already expired. 363 select { 364 case <-deadline: 365 return 0, c.newOpError("write", &timeoutError{}) 366 default: 367 } 368 369 // We must handle two soft failure conditions simultaneously: 370 // 1. Write may write nothing and return *tcpip.ErrWouldBlock. 371 // If this happens, we need to register for notifications if we have 372 // not already and wait to try again. 373 // 2. Write may write fewer than the full number of bytes and return 374 // without error. In this case we need to try writing the remaining 375 // bytes again. I do not need to register for notifications. 376 // 377 // What is more, these two soft failure conditions can be interspersed. 378 // There is no guarantee that all of the condition #1s will occur before 379 // all of the condition #2s or visa-versa. 380 var ( 381 r bytes.Reader 382 nbytes int 383 entry waiter.Entry 384 ch <-chan struct{} 385 ) 386 for nbytes != len(b) { 387 r.Reset(b[nbytes:]) 388 n, err := c.ep.Write(&r, tcpip.WriteOptions{}) 389 nbytes += int(n) 390 switch err.(type) { 391 case nil: 392 case *tcpip.ErrWouldBlock: 393 if ch == nil { 394 entry, ch = waiter.NewChannelEntry(waiter.WritableEvents) 395 c.wq.EventRegister(&entry) 396 defer c.wq.EventUnregister(&entry) 397 } else { 398 // Don't wait immediately after registration in case more data 399 // became available between when we last checked and when we setup 400 // the notification. 401 select { 402 case <-deadline: 403 return nbytes, c.newOpError("write", &timeoutError{}) 404 case <-ch: 405 continue 406 } 407 } 408 default: 409 return nbytes, c.newOpError("write", errors.New(err.String())) 410 } 411 } 412 return nbytes, nil 413 } 414 415 // Close implements net.Conn.Close. 416 func (c *TCPConn) Close() error { 417 c.ep.Close() 418 return nil 419 } 420 421 // CloseRead shuts down the reading side of the TCP connection. Most callers 422 // should just use Close. 423 // 424 // A TCP Half-Close is performed the same as CloseRead for *net.TCPConn. 425 func (c *TCPConn) CloseRead() error { 426 if terr := c.ep.Shutdown(tcpip.ShutdownRead); terr != nil { 427 return c.newOpError("close", errors.New(terr.String())) 428 } 429 return nil 430 } 431 432 // CloseWrite shuts down the writing side of the TCP connection. Most callers 433 // should just use Close. 434 // 435 // A TCP Half-Close is performed the same as CloseWrite for *net.TCPConn. 436 func (c *TCPConn) CloseWrite() error { 437 if terr := c.ep.Shutdown(tcpip.ShutdownWrite); terr != nil { 438 return c.newOpError("close", errors.New(terr.String())) 439 } 440 return nil 441 } 442 443 // LocalAddr implements net.Conn.LocalAddr. 444 func (c *TCPConn) LocalAddr() net.Addr { 445 a, err := c.ep.GetLocalAddress() 446 if err != nil { 447 return nil 448 } 449 return fullToTCPAddr(a) 450 } 451 452 // RemoteAddr implements net.Conn.RemoteAddr. 453 func (c *TCPConn) RemoteAddr() net.Addr { 454 a, err := c.ep.GetRemoteAddress() 455 if err != nil { 456 return nil 457 } 458 return fullToTCPAddr(a) 459 } 460 461 func (c *TCPConn) newOpError(op string, err error) *net.OpError { 462 return &net.OpError{ 463 Op: op, 464 Net: "tcp", 465 Source: c.LocalAddr(), 466 Addr: c.RemoteAddr(), 467 Err: err, 468 } 469 } 470 471 func fullToTCPAddr(addr tcpip.FullAddress) *net.TCPAddr { 472 return &net.TCPAddr{IP: net.IP(addr.Addr.AsSlice()), Port: int(addr.Port)} 473 } 474 475 func fullToUDPAddr(addr tcpip.FullAddress) *net.UDPAddr { 476 return &net.UDPAddr{IP: net.IP(addr.Addr.AsSlice()), Port: int(addr.Port)} 477 } 478 479 // DialTCP creates a new TCPConn connected to the specified address. 480 func DialTCP(s *stack.Stack, addr tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*TCPConn, error) { 481 return DialContextTCP(context.Background(), s, addr, network) 482 } 483 484 // DialTCPWithBind creates a new TCPConn connected to the specified 485 // remoteAddress with its local address bound to localAddr. 486 func DialTCPWithBind(ctx context.Context, s *stack.Stack, localAddr, remoteAddr tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*TCPConn, error) { 487 // Create TCP endpoint, then connect. 488 var wq waiter.Queue 489 ep, err := s.NewEndpoint(tcp.ProtocolNumber, network, &wq) 490 if err != nil { 491 return nil, errors.New(err.String()) 492 } 493 494 // Create wait queue entry that notifies a channel. 495 // 496 // We do this unconditionally as Connect will always return an error. 497 waitEntry, notifyCh := waiter.NewChannelEntry(waiter.WritableEvents) 498 wq.EventRegister(&waitEntry) 499 defer wq.EventUnregister(&waitEntry) 500 501 select { 502 case <-ctx.Done(): 503 return nil, ctx.Err() 504 default: 505 } 506 507 // Bind before connect if requested. 508 if localAddr != (tcpip.FullAddress{}) { 509 if err = ep.Bind(localAddr); err != nil { 510 return nil, fmt.Errorf("ep.Bind(%+v) = %s", localAddr, err) 511 } 512 } 513 514 err = ep.Connect(remoteAddr) 515 if _, ok := err.(*tcpip.ErrConnectStarted); ok { 516 select { 517 case <-ctx.Done(): 518 ep.Close() 519 return nil, ctx.Err() 520 case <-notifyCh: 521 } 522 523 err = ep.LastError() 524 } 525 if err != nil { 526 ep.Close() 527 return nil, &net.OpError{ 528 Op: "connect", 529 Net: "tcp", 530 Addr: fullToTCPAddr(remoteAddr), 531 Err: errors.New(err.String()), 532 } 533 } 534 535 return NewTCPConn(&wq, ep), nil 536 } 537 538 // DialContextTCP creates a new TCPConn connected to the specified address 539 // with the option of adding cancellation and timeouts. 540 func DialContextTCP(ctx context.Context, s *stack.Stack, addr tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*TCPConn, error) { 541 return DialTCPWithBind(ctx, s, tcpip.FullAddress{} /* localAddr */, addr /* remoteAddr */, network) 542 } 543 544 // A UDPConn is a wrapper around a UDP tcpip.Endpoint that implements 545 // net.Conn and net.PacketConn. 546 type UDPConn struct { 547 deadlineTimer 548 549 stack *stack.Stack 550 ep tcpip.Endpoint 551 wq *waiter.Queue 552 } 553 554 // NewUDPConn creates a new UDPConn. 555 func NewUDPConn(s *stack.Stack, wq *waiter.Queue, ep tcpip.Endpoint) *UDPConn { 556 c := &UDPConn{ 557 stack: s, 558 ep: ep, 559 wq: wq, 560 } 561 c.deadlineTimer.init() 562 return c 563 } 564 565 // DialUDP creates a new UDPConn. 566 // 567 // If laddr is nil, a local address is automatically chosen. 568 // 569 // If raddr is nil, the UDPConn is left unconnected. 570 func DialUDP(s *stack.Stack, laddr, raddr *tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*UDPConn, error) { 571 var wq waiter.Queue 572 ep, err := s.NewEndpoint(udp.ProtocolNumber, network, &wq) 573 if err != nil { 574 return nil, errors.New(err.String()) 575 } 576 577 if laddr != nil { 578 if err := ep.Bind(*laddr); err != nil { 579 ep.Close() 580 return nil, &net.OpError{ 581 Op: "bind", 582 Net: "udp", 583 Addr: fullToUDPAddr(*laddr), 584 Err: errors.New(err.String()), 585 } 586 } 587 } 588 589 c := NewUDPConn(s, &wq, ep) 590 591 if raddr != nil { 592 if err := c.ep.Connect(*raddr); err != nil { 593 c.ep.Close() 594 return nil, &net.OpError{ 595 Op: "connect", 596 Net: "udp", 597 Addr: fullToUDPAddr(*raddr), 598 Err: errors.New(err.String()), 599 } 600 } 601 } 602 603 return c, nil 604 } 605 606 func (c *UDPConn) newOpError(op string, err error) *net.OpError { 607 return c.newRemoteOpError(op, nil, err) 608 } 609 610 func (c *UDPConn) newRemoteOpError(op string, remote net.Addr, err error) *net.OpError { 611 return &net.OpError{ 612 Op: op, 613 Net: "udp", 614 Source: c.LocalAddr(), 615 Addr: remote, 616 Err: err, 617 } 618 } 619 620 // RemoteAddr implements net.Conn.RemoteAddr. 621 func (c *UDPConn) RemoteAddr() net.Addr { 622 a, err := c.ep.GetRemoteAddress() 623 if err != nil { 624 return nil 625 } 626 return fullToUDPAddr(a) 627 } 628 629 // Read implements net.Conn.Read 630 func (c *UDPConn) Read(b []byte) (int, error) { 631 bytesRead, _, err := c.ReadFrom(b) 632 return bytesRead, err 633 } 634 635 // ReadFrom implements net.PacketConn.ReadFrom. 636 func (c *UDPConn) ReadFrom(b []byte) (int, net.Addr, error) { 637 deadline := c.readCancel() 638 639 var addr tcpip.FullAddress 640 n, err := commonRead(b, c.ep, c.wq, deadline, &addr, c) 641 if err != nil { 642 return 0, nil, err 643 } 644 return n, fullToUDPAddr(addr), nil 645 } 646 647 func (c *UDPConn) Write(b []byte) (int, error) { 648 return c.WriteTo(b, nil) 649 } 650 651 // WriteTo implements net.PacketConn.WriteTo. 652 func (c *UDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { 653 deadline := c.writeCancel() 654 655 // Check if deadline has already expired. 656 select { 657 case <-deadline: 658 return 0, c.newRemoteOpError("write", addr, &timeoutError{}) 659 default: 660 } 661 662 // If we're being called by Write, there is no addr 663 writeOptions := tcpip.WriteOptions{} 664 if addr != nil { 665 ua := addr.(*net.UDPAddr) 666 writeOptions.To = &tcpip.FullAddress{ 667 Addr: tcpip.AddrFromSlice(ua.IP), 668 Port: uint16(ua.Port), 669 } 670 } 671 672 var r bytes.Reader 673 r.Reset(b) 674 n, err := c.ep.Write(&r, writeOptions) 675 if _, ok := err.(*tcpip.ErrWouldBlock); ok { 676 // Create wait queue entry that notifies a channel. 677 waitEntry, notifyCh := waiter.NewChannelEntry(waiter.WritableEvents) 678 c.wq.EventRegister(&waitEntry) 679 defer c.wq.EventUnregister(&waitEntry) 680 for { 681 select { 682 case <-deadline: 683 return int(n), c.newRemoteOpError("write", addr, &timeoutError{}) 684 case <-notifyCh: 685 } 686 687 n, err = c.ep.Write(&r, writeOptions) 688 if _, ok := err.(*tcpip.ErrWouldBlock); !ok { 689 break 690 } 691 } 692 } 693 694 if err == nil { 695 return int(n), nil 696 } 697 698 return int(n), c.newRemoteOpError("write", addr, errors.New(err.String())) 699 } 700 701 // Close implements net.PacketConn.Close. 702 func (c *UDPConn) Close() error { 703 c.ep.Close() 704 return nil 705 } 706 707 // LocalAddr implements net.PacketConn.LocalAddr. 708 func (c *UDPConn) LocalAddr() net.Addr { 709 a, err := c.ep.GetLocalAddress() 710 if err != nil { 711 return nil 712 } 713 return fullToUDPAddr(a) 714 }