github.com/sunvim/utils@v0.1.0/netpoll/net_unix.go (about) 1 // Copyright (c) 2020 Meng Huang (mhboy@outlook.com) 2 // This package is licensed under a MIT license that can be found in the LICENSE file. 3 4 //go:build linux || darwin || dragonfly || freebsd || netbsd || openbsd 5 // +build linux darwin dragonfly freebsd netbsd openbsd 6 7 package netpoll 8 9 import ( 10 "errors" 11 "github.com/hslam/buffer" 12 "github.com/hslam/scheduler" 13 "github.com/hslam/sendfile" 14 "github.com/hslam/splice" 15 "io" 16 "net" 17 "os" 18 "runtime" 19 "sync" 20 "sync/atomic" 21 "syscall" 22 "time" 23 ) 24 25 const ( 26 idleTime = time.Second 27 ) 28 29 var ( 30 numCPU = runtime.NumCPU() 31 ) 32 33 // Server defines parameters for running a server. 34 type Server struct { 35 Network string 36 Address string 37 // Handler responds to a single request. 38 Handler Handler 39 // NoAsync disables async. 40 NoAsync bool 41 UnsharedWorkers int 42 SharedWorkers int 43 addr net.Addr 44 netServer *netServer 45 file *os.File 46 fd int 47 poll *Poll 48 workers []*worker 49 heap []*worker 50 rescheduled bool 51 lock sync.Mutex 52 wake bool 53 rescheduling int32 54 list list 55 adjust list 56 unsharedWorkers uint 57 sharedWorkers uint 58 wg sync.WaitGroup 59 closed int32 60 done chan struct{} 61 } 62 63 // ListenAndServe listens on the network address and then calls 64 // Serve with handler to handle requests on incoming connections. 65 // 66 // ListenAndServe always returns a non-nil error. 67 // After Close the returned error is ErrServerClosed. 68 func (s *Server) ListenAndServe() error { 69 if atomic.LoadInt32(&s.closed) != 0 { 70 return ErrServerClosed 71 } 72 ln, err := net.Listen(s.Network, s.Address) 73 if err != nil { 74 return err 75 } 76 return s.Serve(ln) 77 } 78 79 // Serve accepts incoming connections on the listener l, 80 // and registers the conn fd to poll. The poll will trigger the fd to 81 // read requests and then call handler to reply to them. 82 // 83 // The handler must be not nil. 84 // 85 // Serve always returns a non-nil error. 86 // After Close the returned error is ErrServerClosed. 87 func (s *Server) Serve(l net.Listener) (err error) { 88 if atomic.LoadInt32(&s.closed) != 0 { 89 return ErrServerClosed 90 } 91 if s.UnsharedWorkers == 0 { 92 s.unsharedWorkers = 16 93 } else if s.UnsharedWorkers > 0 { 94 s.unsharedWorkers = uint(s.UnsharedWorkers) 95 } 96 if s.SharedWorkers == 0 { 97 s.sharedWorkers = uint(numCPU) 98 } else if s.SharedWorkers > 0 { 99 s.sharedWorkers = uint(s.SharedWorkers) 100 } else { 101 panic("SharedWorkers < 0") 102 } 103 if l == nil { 104 return ErrListener 105 } else if s.Handler == nil { 106 return ErrHandler 107 } 108 switch netListener := l.(type) { 109 case *net.TCPListener: 110 if s.file, err = netListener.File(); err != nil { 111 l.Close() 112 return err 113 } 114 case *net.UnixListener: 115 if s.file, err = netListener.File(); err != nil { 116 l.Close() 117 return err 118 } 119 default: 120 s.netServer = &netServer{Handler: s.Handler} 121 return s.netServer.Serve(l) 122 } 123 s.fd = int(s.file.Fd()) 124 s.addr = l.Addr() 125 l.Close() 126 if err := syscall.SetNonblock(s.fd, true); err != nil { 127 return err 128 } 129 if s.poll, err = Create(); err != nil { 130 return err 131 } 132 s.poll.Register(s.fd) 133 if !s.NoAsync && s.unsharedWorkers > 0 { 134 s.rescheduled = true 135 } 136 for i := 0; i < int(s.unsharedWorkers+s.sharedWorkers); i++ { 137 p, err := Create() 138 if err != nil { 139 return err 140 } 141 var async bool 142 if i >= int(s.unsharedWorkers) && !s.NoAsync { 143 async = true 144 } 145 w := &worker{ 146 index: i, 147 server: s, 148 conns: make(map[int]*conn), 149 poll: p, 150 events: make([]Event, 0x400), 151 async: async, 152 done: make(chan struct{}, 1), 153 } 154 s.workers = append(s.workers, w) 155 if i >= int(s.unsharedWorkers) { 156 s.heap = append(s.heap, w) 157 } 158 } 159 s.done = make(chan struct{}, 1) 160 var n int 161 var events = make([]Event, 1) 162 for err == nil { 163 if n, err = s.poll.Wait(events); n > 0 { 164 if events[0].Fd == s.fd { 165 err = s.accept() 166 } 167 s.wakeReschedule() 168 } 169 runtime.Gosched() 170 } 171 s.wg.Wait() 172 return err 173 } 174 175 func (s *Server) accept() (err error) { 176 nfd, sa, err := syscall.Accept(s.fd) 177 if err != nil { 178 if err == syscall.EAGAIN { 179 return nil 180 } 181 return err 182 } 183 if err := syscall.SetNonblock(nfd, true); err != nil { 184 return err 185 } 186 var raddr net.Addr 187 switch sockaddr := sa.(type) { 188 case *syscall.SockaddrUnix: 189 raddr = &net.UnixAddr{Net: "unix", Name: sockaddr.Name} 190 case *syscall.SockaddrInet4: 191 raddr = &net.TCPAddr{ 192 IP: append([]byte{}, sockaddr.Addr[:]...), 193 Port: sockaddr.Port, 194 } 195 if err := syscall.SetsockoptInt(nfd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY, 1); err != nil { 196 return err 197 } 198 case *syscall.SockaddrInet6: 199 var zone string 200 if ifi, err := net.InterfaceByIndex(int(sockaddr.ZoneId)); err == nil { 201 zone = ifi.Name 202 } 203 raddr = &net.TCPAddr{ 204 IP: append([]byte{}, sockaddr.Addr[:]...), 205 Port: sockaddr.Port, 206 Zone: zone, 207 } 208 if err := syscall.SetsockoptInt(nfd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY, 1); err != nil { 209 return err 210 } 211 } 212 s.lock.Lock() 213 w := s.assignWorker() 214 err = w.register(&conn{w: w, fd: nfd, raddr: raddr, laddr: s.addr}) 215 s.lock.Unlock() 216 return 217 } 218 219 func (s *Server) assignWorker() (w *worker) { 220 if w := s.idleUnsharedWorkers(); w != nil { 221 return w 222 } 223 return s.leastConnectedSharedWorkers() 224 } 225 226 func (s *Server) idleUnsharedWorkers() (w *worker) { 227 if s.unsharedWorkers > 0 { 228 for i := 0; i < int(s.unsharedWorkers); i++ { 229 if s.workers[i].count < 1 { 230 return s.workers[i] 231 } 232 } 233 } 234 return nil 235 } 236 237 func (s *Server) leastConnectedSharedWorkers() (w *worker) { 238 minHeap(s.heap) 239 return s.heap[0] 240 } 241 242 func (s *Server) wakeReschedule() { 243 if !s.rescheduled { 244 return 245 } 246 s.lock.Lock() 247 if !s.wake { 248 s.wake = true 249 s.lock.Unlock() 250 go func() { 251 ticker := time.NewTicker(time.Millisecond * 100) 252 for { 253 select { 254 case <-ticker.C: 255 s.lock.Lock() 256 stop := s.reschedule() 257 if stop { 258 s.wake = false 259 ticker.Stop() 260 s.lock.Unlock() 261 return 262 } 263 s.lock.Unlock() 264 case <-s.done: 265 ticker.Stop() 266 return 267 } 268 runtime.Gosched() 269 } 270 }() 271 } else { 272 s.lock.Unlock() 273 } 274 } 275 276 func (s *Server) reschedule() (stop bool) { 277 if !s.rescheduled { 278 return 279 } 280 if !atomic.CompareAndSwapInt32(&s.rescheduling, 0, 1) { 281 return false 282 } 283 defer atomic.StoreInt32(&s.rescheduling, 0) 284 s.adjust = s.adjust[:0] 285 s.list = s.list[:0] 286 sum := int64(0) 287 for idx, w := range s.workers { 288 w.lock.Lock() 289 if !w.running { 290 w.lock.Unlock() 291 continue 292 } 293 for _, conn := range w.conns { 294 if uint(idx) < s.unsharedWorkers { 295 s.adjust = append(s.adjust, conn) 296 } 297 conn.score = atomic.LoadInt64(&conn.count) 298 atomic.StoreInt64(&conn.count, 0) 299 sum += conn.score 300 s.list = append(s.list, conn) 301 } 302 w.lock.Unlock() 303 } 304 if len(s.list) == 0 || sum == 0 { 305 return true 306 } 307 unsharedWorkers := s.unsharedWorkers 308 if uint(len(s.list)) < s.unsharedWorkers { 309 unsharedWorkers = uint(len(s.list)) 310 } 311 topK(s.list, int(unsharedWorkers)) 312 index := 0 313 for _, conn := range s.list[:unsharedWorkers] { 314 conn.lock.Lock() 315 if conn.w.async { 316 conn.lock.Unlock() 317 s.list[index] = conn 318 index++ 319 } else { 320 conn.lock.Unlock() 321 if len(s.adjust) > 0 { 322 for i := 0; i < len(s.adjust); i++ { 323 if conn == s.adjust[i] { 324 if i < len(s.adjust)-1 { 325 copy(s.adjust[i:], s.adjust[i+1:]) 326 } 327 s.adjust = s.adjust[:len(s.adjust)-1] 328 break 329 } 330 } 331 } 332 } 333 } 334 var reschedules = s.list[:index] 335 if len(reschedules) == 0 || len(reschedules) != len(s.adjust) { 336 return false 337 } 338 for i := 0; i < len(reschedules); i++ { 339 if atomic.LoadInt32(&s.adjust[i].ready) == 0 || atomic.LoadInt32(&reschedules[i].ready) == 0 { 340 continue 341 } 342 s.adjust[i].lock.Lock() 343 reschedules[i].lock.Lock() 344 unsharedWorker := s.adjust[i].w 345 sharedWorker := reschedules[i].w 346 unsharedWorker.lock.Lock() 347 sharedWorker.lock.Lock() 348 unsharedWorker.decrease(s.adjust[i]) 349 s.adjust[i].w = sharedWorker 350 sharedWorker.increase(s.adjust[i]) 351 sharedWorker.decrease(reschedules[i]) 352 reschedules[i].w = unsharedWorker 353 unsharedWorker.increase(reschedules[i]) 354 sharedWorker.lock.Unlock() 355 unsharedWorker.lock.Unlock() 356 s.adjust[i].lock.Unlock() 357 reschedules[i].lock.Unlock() 358 } 359 return false 360 } 361 362 // Close closes the server. 363 func (s *Server) Close() error { 364 if !atomic.CompareAndSwapInt32(&s.closed, 0, 1) { 365 return nil 366 } 367 if s.netServer != nil { 368 return s.netServer.Close() 369 } 370 for i := 0; i < len(s.workers); i++ { 371 s.workers[i].Close() 372 } 373 if err := s.file.Close(); err != nil { 374 return err 375 } 376 if s.done != nil { 377 close(s.done) 378 } 379 return s.poll.Close() 380 } 381 382 type worker struct { 383 index int 384 server *Server 385 count int64 386 lock sync.Mutex 387 conns map[int]*conn 388 lastIdle time.Time 389 poll *Poll 390 events []Event 391 async bool 392 done chan struct{} 393 running bool 394 slept int32 395 closed int32 396 } 397 398 func (w *worker) run(wg *sync.WaitGroup) { 399 defer wg.Done() 400 var n int 401 var err error 402 for err == nil { 403 n, err = w.poll.Wait(w.events) 404 if n > 0 { 405 for i := range w.events[:n] { 406 ev := w.events[i] 407 if w.async { 408 wg.Add(1) 409 scheduler.Schedule(func() { 410 w.serve(ev) 411 wg.Done() 412 }) 413 } else { 414 w.serve(ev) 415 } 416 } 417 } 418 if atomic.LoadInt64(&w.count) < 1 { 419 w.lock.Lock() 420 if len(w.conns) == 0 && w.lastIdle.Add(idleTime).Before(time.Now()) { 421 w.sleep() 422 w.running = false 423 w.lock.Unlock() 424 return 425 } 426 w.lock.Unlock() 427 } 428 runtime.Gosched() 429 } 430 } 431 432 func (w *worker) serve(ev Event) error { 433 fd := ev.Fd 434 if fd == 0 { 435 return nil 436 } 437 w.lock.Lock() 438 c, ok := w.conns[fd] 439 if !ok { 440 w.lock.Unlock() 441 return nil 442 } 443 w.lock.Unlock() 444 if atomic.LoadInt32(&c.ready) == 0 { 445 return nil 446 } 447 switch ev.Mode { 448 case WRITE: 449 case READ: 450 w.serveConn(c) 451 } 452 return nil 453 } 454 455 func (w *worker) serveConn(c *conn) error { 456 for { 457 err := w.server.Handler.Serve(c.context) 458 if err != nil { 459 if err == syscall.EAGAIN { 460 return nil 461 } 462 if !atomic.CompareAndSwapInt32(&c.closing, 0, 1) { 463 return nil 464 } 465 w.Decrease(c) 466 c.Close() 467 return nil 468 } 469 } 470 } 471 472 func (w *worker) register(c *conn) error { 473 w.Increase(c) 474 go func(w *worker, c *conn) { 475 var err error 476 defer func() { 477 if err != nil { 478 w.Decrease(c) 479 c.Close() 480 } 481 }() 482 if err = syscall.SetNonblock(c.fd, false); err != nil { 483 return 484 } 485 if c.context, err = w.server.Handler.Upgrade(c); err != nil { 486 return 487 } 488 if err = syscall.SetNonblock(c.fd, true); err != nil { 489 return 490 } 491 atomic.StoreInt32(&c.ready, 1) 492 w.serveConn(c) 493 }(w, c) 494 return nil 495 } 496 497 func (w *worker) Increase(c *conn) { 498 w.lock.Lock() 499 w.increase(c) 500 w.lock.Unlock() 501 } 502 503 func (w *worker) increase(c *conn) { 504 w.conns[c.fd] = c 505 atomic.AddInt64(&w.count, 1) 506 w.poll.Register(c.fd) 507 w.wake() 508 } 509 510 func (w *worker) Decrease(c *conn) { 511 w.lock.Lock() 512 w.decrease(c) 513 w.lock.Unlock() 514 } 515 516 func (w *worker) decrease(c *conn) { 517 w.poll.Unregister(c.fd) 518 delete(w.conns, c.fd) 519 if atomic.AddInt64(&w.count, -1) < 1 { 520 w.lastIdle = time.Now() 521 } 522 } 523 524 func (w *worker) wake() { 525 if !w.running { 526 w.running = true 527 w.done = make(chan struct{}, 1) 528 atomic.StoreInt32(&w.slept, 0) 529 w.server.wg.Add(1) 530 go w.run(&w.server.wg) 531 } 532 } 533 534 func (w *worker) sleep() { 535 if !atomic.CompareAndSwapInt32(&w.slept, 0, 1) { 536 return 537 } 538 close(w.done) 539 } 540 541 func (w *worker) Close() { 542 if !atomic.CompareAndSwapInt32(&w.closed, 0, 1) { 543 return 544 } 545 w.lock.Lock() 546 for _, c := range w.conns { 547 c.Close() 548 delete(w.conns, c.fd) 549 } 550 w.sleep() 551 w.poll.Close() 552 w.lock.Unlock() 553 } 554 555 type conn struct { 556 lock sync.Mutex 557 w *worker 558 rlock sync.Mutex 559 wlock sync.Mutex 560 fd int 561 laddr net.Addr 562 raddr net.Addr 563 context Context 564 ready int32 565 count int64 566 score int64 567 closing int32 568 closed int32 569 } 570 571 // Read reads data from the connection. 572 func (c *conn) Read(b []byte) (n int, err error) { 573 if len(b) == 0 { 574 return 0, nil 575 } 576 c.lock.Lock() 577 if c.w.server.rescheduled { 578 c.lock.Unlock() 579 atomic.AddInt64(&c.count, 1) 580 } else { 581 c.lock.Unlock() 582 } 583 c.rlock.Lock() 584 n, err = syscall.Read(c.fd, b) 585 c.rlock.Unlock() 586 if err != nil && err != syscall.EAGAIN || err == nil && n == 0 { 587 err = EOF 588 } 589 if n < 0 { 590 n = 0 591 } 592 return 593 } 594 595 // Write writes data to the connection. 596 func (c *conn) Write(b []byte) (n int, err error) { 597 if len(b) == 0 { 598 return 0, nil 599 } 600 var remain = len(b) 601 c.wlock.Lock() 602 for remain > 0 { 603 n, err = syscall.Write(c.fd, b[len(b)-remain:]) 604 if n > 0 { 605 remain -= n 606 continue 607 } 608 if err != syscall.EAGAIN { 609 c.wlock.Unlock() 610 return len(b) - remain, EOF 611 } 612 } 613 c.wlock.Unlock() 614 return len(b), nil 615 } 616 617 // Close closes the connection. 618 func (c *conn) Close() (err error) { 619 if !atomic.CompareAndSwapInt32(&c.closed, 0, 1) { 620 return 621 } 622 return syscall.Close(c.fd) 623 } 624 625 // LocalAddr returns the local network address. 626 func (c *conn) LocalAddr() net.Addr { 627 return c.laddr 628 } 629 630 // RemoteAddr returns the remote network address. 631 func (c *conn) RemoteAddr() net.Addr { 632 return c.raddr 633 } 634 635 func (c *conn) SetDeadline(t time.Time) error { 636 return errors.New("not supported") 637 } 638 639 func (c *conn) SetReadDeadline(t time.Time) error { 640 return errors.New("not supported") 641 } 642 643 func (c *conn) SetWriteDeadline(t time.Time) error { 644 return errors.New("not supported") 645 } 646 647 func (c *conn) ok() bool { return c != nil && c.fd > 0 && atomic.LoadInt32(&c.closed) == 0 } 648 649 // SyscallConn returns a raw network connection. 650 // This implements the syscall.Conn interface. 651 func (c *conn) SyscallConn() (syscall.RawConn, error) { 652 return &rawConn{uintptr(c.fd), c}, nil 653 } 654 655 // ReadFrom implements the io.ReaderFrom ReadFrom method. 656 func (c *conn) ReadFrom(r io.Reader) (int64, error) { 657 var remain int64 658 if lr, ok := r.(*io.LimitedReader); ok { 659 remain, r = lr.N, lr.R 660 if remain <= 0 { 661 return 0, nil 662 } 663 } 664 if syscallConn, ok := r.(syscall.Conn); ok { 665 if src, ok := r.(net.Conn); ok { 666 if remain <= 0 { 667 remain = bufferSize 668 } 669 var n int64 670 var err error 671 n, err = splice.Splice(c, src, remain) 672 if err != splice.ErrNotHandled { 673 return n, err 674 } 675 } 676 if raw, err := syscallConn.SyscallConn(); err == nil { 677 var src int 678 raw.Control(func(fd uintptr) { 679 src = int(fd) 680 }) 681 if pos, err := syscall.Seek(src, 0, io.SeekCurrent); err == nil { 682 size, _ := syscall.Seek(src, 0, io.SeekEnd) 683 syscall.Seek(src, pos, io.SeekStart) 684 if remain <= 0 || remain > size-pos { 685 remain = size - pos 686 } 687 if remain <= 0 { 688 return 0, nil 689 } 690 return sendfile.SendFile(c, src, pos, remain) 691 } 692 } 693 } 694 return genericReadFrom(c, r, remain) 695 } 696 697 func genericReadFrom(w io.Writer, r io.Reader, remain int64) (n int64, err error) { 698 if remain < 0 { 699 return 700 } 701 if remain == 0 { 702 remain = bufferSize 703 } else if remain > bufferSize { 704 remain = bufferSize 705 } 706 pool := buffer.AssignPool(int(remain)) 707 buf := pool.GetBuffer(int(remain)) 708 defer pool.PutBuffer(buf) 709 var nr int 710 nr, err = r.Read(buf) 711 if err != nil { 712 return 0, err 713 } 714 var out int 715 var pos int 716 for nr > 0 { 717 out, err = w.Write(buf[pos : pos+nr]) 718 if out > 0 { 719 nr -= out 720 n += int64(out) 721 pos += out 722 continue 723 } 724 if err != syscall.EAGAIN { 725 return n, EOF 726 } 727 } 728 return n, nil 729 } 730 731 type rawConn struct { 732 fd uintptr 733 c *conn 734 } 735 736 func (c *rawConn) Control(f func(fd uintptr)) error { 737 if !c.c.ok() { 738 return syscall.EINVAL 739 } 740 f(c.fd) 741 return nil 742 } 743 744 func (c *rawConn) Read(f func(fd uintptr) (done bool)) error { 745 if !c.c.ok() { 746 return syscall.EINVAL 747 } 748 f(c.fd) 749 return nil 750 } 751 752 func (c *rawConn) Write(f func(fd uintptr) (done bool)) error { 753 if !c.c.ok() { 754 return syscall.EINVAL 755 } 756 f(c.fd) 757 return nil 758 } 759 760 type workers []*worker 761 762 func (l workers) Len() int { return len(l) } 763 func (l workers) Less(i, j int) bool { 764 return l[i].count < l[j].count 765 } 766 func (l workers) Swap(i, j int) { l[i], l[j] = l[j], l[i] } 767 768 func minHeap(h workers) { 769 n := h.Len() 770 for i := n/2 - 1; i >= 0; i-- { 771 heapDown(h, i, n) 772 } 773 } 774 775 type list []*conn 776 777 func (l list) Len() int { return len(l) } 778 func (l list) Less(i, j int) bool { 779 return atomic.LoadInt64(&l[i].score) < atomic.LoadInt64(&l[j].score) 780 } 781 func (l list) Swap(i, j int) { l[i], l[j] = l[j], l[i] } 782 783 func topK(h list, k int) { 784 n := h.Len() 785 if k > n { 786 k = n 787 } 788 for i := k/2 - 1; i >= 0; i-- { 789 heapDown(h, i, k) 790 } 791 if k < n { 792 for i := k; i < n; i++ { 793 if h.Less(0, i) { 794 h.Swap(0, i) 795 heapDown(h, 0, k) 796 } 797 } 798 } 799 } 800 801 type sort interface { 802 // Len is the number of elements in the collection. 803 Len() int 804 // Less reports whether the element with 805 // index i should sort before the element with index j. 806 Less(i, j int) bool 807 // Swap swaps the elements with indexes i and j. 808 Swap(i, j int) 809 } 810 811 func heapDown(h sort, i, n int) bool { 812 parent := i 813 for { 814 leftChild := 2*parent + 1 815 if leftChild >= n || leftChild < 0 { // leftChild < 0 after int overflow 816 break 817 } 818 lessChild := leftChild 819 if rightChild := leftChild + 1; rightChild < n && h.Less(rightChild, leftChild) { 820 lessChild = rightChild 821 } 822 if !h.Less(lessChild, parent) { 823 break 824 } 825 h.Swap(parent, lessChild) 826 parent = lessChild 827 } 828 return parent > i 829 }