gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/internal/poll/fd_unix.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris 6 // +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris 7 8 package poll 9 10 import ( 11 "io" 12 "sync/atomic" 13 "syscall" 14 ) 15 16 // FD is a file descriptor. The net and os packages use this type as a 17 // field of a larger type representing a network connection or OS file. 18 type FD struct { 19 // Lock sysfd and serialize access to Read and Write methods. 20 fdmu fdMutex 21 22 // System file descriptor. Immutable until Close. 23 Sysfd int 24 25 // I/O poller. 26 pd pollDesc 27 28 // Writev cache. 29 iovecs *[]syscall.Iovec 30 31 // Semaphore signaled when file is closed. 32 csema uint32 33 34 // Non-zero if this file has been set to blocking mode. 35 isBlocking uint32 36 37 // Whether this is a streaming descriptor, as opposed to a 38 // packet-based descriptor like a UDP socket. Immutable. 39 IsStream bool 40 41 // Whether a zero byte read indicates EOF. This is false for a 42 // message based socket connection. 43 ZeroReadIsEOF bool 44 45 // Whether this is a file rather than a network socket. 46 isFile bool 47 } 48 49 // Init initializes the FD. The Sysfd field should already be set. 50 // This can be called multiple times on a single FD. 51 // The net argument is a network name from the net package (e.g., "tcp"), 52 // or "file". 53 // Set pollable to true if fd should be managed by runtime netpoll. 54 func (fd *FD) Init(net string, pollable bool) error { 55 // We don't actually care about the various network types. 56 if net == "file" { 57 fd.isFile = true 58 } 59 if !pollable { 60 fd.isBlocking = 1 61 return nil 62 } 63 err := fd.pd.init(fd) 64 if err != nil { 65 // If we could not initialize the runtime poller, 66 // assume we are using blocking mode. 67 fd.isBlocking = 1 68 } 69 return err 70 } 71 72 // Destroy closes the file descriptor. This is called when there are 73 // no remaining references. 74 func (fd *FD) destroy() error { 75 // Poller may want to unregister fd in readiness notification mechanism, 76 // so this must be executed before CloseFunc. 77 fd.pd.close() 78 79 // We don't use ignoringEINTR here because POSIX does not define 80 // whether the descriptor is closed if close returns EINTR. 81 // If the descriptor is indeed closed, using a loop would race 82 // with some other goroutine opening a new descriptor. 83 // (The Linux kernel guarantees that it is closed on an EINTR error.) 84 err := CloseFunc(fd.Sysfd) 85 86 fd.Sysfd = -1 87 runtime_Semrelease(&fd.csema) 88 return err 89 } 90 91 // Close closes the FD. The underlying file descriptor is closed by the 92 // destroy method when there are no remaining references. 93 func (fd *FD) Close() error { 94 if !fd.fdmu.increfAndClose() { 95 return errClosing(fd.isFile) 96 } 97 98 // Unblock any I/O. Once it all unblocks and returns, 99 // so that it cannot be referring to fd.sysfd anymore, 100 // the final decref will close fd.sysfd. This should happen 101 // fairly quickly, since all the I/O is non-blocking, and any 102 // attempts to block in the pollDesc will return errClosing(fd.isFile). 103 fd.pd.evict() 104 105 // The call to decref will call destroy if there are no other 106 // references. 107 err := fd.decref() 108 109 // Wait until the descriptor is closed. If this was the only 110 // reference, it is already closed. Only wait if the file has 111 // not been set to blocking mode, as otherwise any current I/O 112 // may be blocking, and that would block the Close. 113 // No need for an atomic read of isBlocking, increfAndClose means 114 // we have exclusive access to fd. 115 if fd.isBlocking == 0 { 116 runtime_Semacquire(&fd.csema) 117 } 118 119 return err 120 } 121 122 // SetBlocking puts the file into blocking mode. 123 func (fd *FD) SetBlocking() error { 124 if err := fd.incref(); err != nil { 125 return err 126 } 127 defer fd.decref() 128 // Atomic store so that concurrent calls to SetBlocking 129 // do not cause a race condition. isBlocking only ever goes 130 // from 0 to 1 so there is no real race here. 131 atomic.StoreUint32(&fd.isBlocking, 1) 132 return syscall.SetNonblock(fd.Sysfd, false) 133 } 134 135 // Darwin and FreeBSD can't read or write 2GB+ files at a time, 136 // even on 64-bit systems. 137 // The same is true of socket implementations on many systems. 138 // See golang.org/issue/7812 and golang.org/issue/16266. 139 // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned. 140 const maxRW = 1 << 30 141 142 // Read implements io.Reader. 143 func (fd *FD) Read(p []byte) (int, error) { 144 if err := fd.readLock(); err != nil { 145 return 0, err 146 } 147 defer fd.readUnlock() 148 if len(p) == 0 { 149 // If the caller wanted a zero byte read, return immediately 150 // without trying (but after acquiring the readLock). 151 // Otherwise syscall.Read returns 0, nil which looks like 152 // io.EOF. 153 // TODO(bradfitz): make it wait for readability? (Issue 15735) 154 return 0, nil 155 } 156 if err := fd.pd.prepareRead(fd.isFile); err != nil { 157 return 0, err 158 } 159 if fd.IsStream && len(p) > maxRW { 160 p = p[:maxRW] 161 } 162 for { 163 n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p) 164 if err != nil { 165 n = 0 166 if err == syscall.EAGAIN && fd.pd.pollable() { 167 if err = fd.pd.waitRead(fd.isFile); err == nil { 168 continue 169 } 170 } 171 } 172 err = fd.eofError(n, err) 173 return n, err 174 } 175 } 176 177 // Pread wraps the pread system call. 178 func (fd *FD) Pread(p []byte, off int64) (int, error) { 179 // Call incref, not readLock, because since pread specifies the 180 // offset it is independent from other reads. 181 // Similarly, using the poller doesn't make sense for pread. 182 if err := fd.incref(); err != nil { 183 return 0, err 184 } 185 if fd.IsStream && len(p) > maxRW { 186 p = p[:maxRW] 187 } 188 var ( 189 n int 190 err error 191 ) 192 for { 193 n, err = syscall.Pread(fd.Sysfd, p, off) 194 if err != syscall.EINTR { 195 break 196 } 197 } 198 if err != nil { 199 n = 0 200 } 201 fd.decref() 202 err = fd.eofError(n, err) 203 return n, err 204 } 205 206 // ReadFrom wraps the recvfrom network call. 207 func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) { 208 if err := fd.readLock(); err != nil { 209 return 0, nil, err 210 } 211 defer fd.readUnlock() 212 if err := fd.pd.prepareRead(fd.isFile); err != nil { 213 return 0, nil, err 214 } 215 for { 216 n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0) 217 if err != nil { 218 if err == syscall.EINTR { 219 continue 220 } 221 n = 0 222 if err == syscall.EAGAIN && fd.pd.pollable() { 223 if err = fd.pd.waitRead(fd.isFile); err == nil { 224 continue 225 } 226 } 227 } 228 err = fd.eofError(n, err) 229 return n, sa, err 230 } 231 } 232 233 // ReadMsg wraps the recvmsg network call. 234 func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) { 235 if err := fd.readLock(); err != nil { 236 return 0, 0, 0, nil, err 237 } 238 defer fd.readUnlock() 239 if err := fd.pd.prepareRead(fd.isFile); err != nil { 240 return 0, 0, 0, nil, err 241 } 242 for { 243 n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags) 244 if err != nil { 245 if err == syscall.EINTR { 246 continue 247 } 248 // TODO(dfc) should n and oobn be set to 0 249 if err == syscall.EAGAIN && fd.pd.pollable() { 250 if err = fd.pd.waitRead(fd.isFile); err == nil { 251 continue 252 } 253 } 254 } 255 err = fd.eofError(n, err) 256 return n, oobn, sysflags, sa, err 257 } 258 } 259 260 // Write implements io.Writer. 261 func (fd *FD) Write(p []byte) (int, error) { 262 if err := fd.writeLock(); err != nil { 263 return 0, err 264 } 265 defer fd.writeUnlock() 266 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 267 return 0, err 268 } 269 var nn int 270 for { 271 max := len(p) 272 if fd.IsStream && max-nn > maxRW { 273 max = nn + maxRW 274 } 275 n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max]) 276 if n > 0 { 277 nn += n 278 } 279 if nn == len(p) { 280 return nn, err 281 } 282 if err == syscall.EAGAIN && fd.pd.pollable() { 283 if err = fd.pd.waitWrite(fd.isFile); err == nil { 284 continue 285 } 286 } 287 if err != nil { 288 return nn, err 289 } 290 if n == 0 { 291 return nn, io.ErrUnexpectedEOF 292 } 293 } 294 } 295 296 // Pwrite wraps the pwrite system call. 297 func (fd *FD) Pwrite(p []byte, off int64) (int, error) { 298 // Call incref, not writeLock, because since pwrite specifies the 299 // offset it is independent from other writes. 300 // Similarly, using the poller doesn't make sense for pwrite. 301 if err := fd.incref(); err != nil { 302 return 0, err 303 } 304 defer fd.decref() 305 var nn int 306 for { 307 max := len(p) 308 if fd.IsStream && max-nn > maxRW { 309 max = nn + maxRW 310 } 311 n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn)) 312 if err == syscall.EINTR { 313 continue 314 } 315 if n > 0 { 316 nn += n 317 } 318 if nn == len(p) { 319 return nn, err 320 } 321 if err != nil { 322 return nn, err 323 } 324 if n == 0 { 325 return nn, io.ErrUnexpectedEOF 326 } 327 } 328 } 329 330 // WriteTo wraps the sendto network call. 331 func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) { 332 if err := fd.writeLock(); err != nil { 333 return 0, err 334 } 335 defer fd.writeUnlock() 336 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 337 return 0, err 338 } 339 for { 340 err := syscall.Sendto(fd.Sysfd, p, 0, sa) 341 if err == syscall.EINTR { 342 continue 343 } 344 if err == syscall.EAGAIN && fd.pd.pollable() { 345 if err = fd.pd.waitWrite(fd.isFile); err == nil { 346 continue 347 } 348 } 349 if err != nil { 350 return 0, err 351 } 352 return len(p), nil 353 } 354 } 355 356 // WriteMsg wraps the sendmsg network call. 357 func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) { 358 if err := fd.writeLock(); err != nil { 359 return 0, 0, err 360 } 361 defer fd.writeUnlock() 362 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 363 return 0, 0, err 364 } 365 for { 366 n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0) 367 if err == syscall.EINTR { 368 continue 369 } 370 if err == syscall.EAGAIN && fd.pd.pollable() { 371 if err = fd.pd.waitWrite(fd.isFile); err == nil { 372 continue 373 } 374 } 375 if err != nil { 376 return n, 0, err 377 } 378 return n, len(oob), err 379 } 380 } 381 382 // Accept wraps the accept network call. 383 func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) { 384 if err := fd.readLock(); err != nil { 385 return -1, nil, "", err 386 } 387 defer fd.readUnlock() 388 389 if err := fd.pd.prepareRead(fd.isFile); err != nil { 390 return -1, nil, "", err 391 } 392 for { 393 s, rsa, errcall, err := accept(fd.Sysfd) 394 if err == nil { 395 return s, rsa, "", err 396 } 397 switch err { 398 case syscall.EINTR: 399 continue 400 case syscall.EAGAIN: 401 if fd.pd.pollable() { 402 if err = fd.pd.waitRead(fd.isFile); err == nil { 403 continue 404 } 405 } 406 case syscall.ECONNABORTED: 407 // This means that a socket on the listen 408 // queue was closed before we Accept()ed it; 409 // it's a silly error, so try again. 410 continue 411 } 412 return -1, nil, errcall, err 413 } 414 } 415 416 // Seek wraps syscall.Seek. 417 func (fd *FD) Seek(offset int64, whence int) (int64, error) { 418 if err := fd.incref(); err != nil { 419 return 0, err 420 } 421 defer fd.decref() 422 return syscall.Seek(fd.Sysfd, offset, whence) 423 } 424 425 // ReadDirent wraps syscall.ReadDirent. 426 // We treat this like an ordinary system call rather than a call 427 // that tries to fill the buffer. 428 func (fd *FD) ReadDirent(buf []byte) (int, error) { 429 if err := fd.incref(); err != nil { 430 return 0, err 431 } 432 defer fd.decref() 433 for { 434 n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf) 435 if err != nil { 436 n = 0 437 if err == syscall.EAGAIN && fd.pd.pollable() { 438 if err = fd.pd.waitRead(fd.isFile); err == nil { 439 continue 440 } 441 } 442 } 443 // Do not call eofError; caller does not expect to see io.EOF. 444 return n, err 445 } 446 } 447 448 // Fchmod wraps syscall.Fchmod. 449 func (fd *FD) Fchmod(mode uint32) error { 450 if err := fd.incref(); err != nil { 451 return err 452 } 453 defer fd.decref() 454 return ignoringEINTR(func() error { 455 return syscall.Fchmod(fd.Sysfd, mode) 456 }) 457 } 458 459 // Fchdir wraps syscall.Fchdir. 460 func (fd *FD) Fchdir() error { 461 if err := fd.incref(); err != nil { 462 return err 463 } 464 defer fd.decref() 465 return syscall.Fchdir(fd.Sysfd) 466 } 467 468 // Fstat wraps syscall.Fstat 469 func (fd *FD) Fstat(s *syscall.Stat_t) error { 470 if err := fd.incref(); err != nil { 471 return err 472 } 473 defer fd.decref() 474 return ignoringEINTR(func() error { 475 return syscall.Fstat(fd.Sysfd, s) 476 }) 477 } 478 479 // tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used. 480 // If the kernel doesn't support it, this is set to 0. 481 var tryDupCloexec = int32(1) 482 483 // DupCloseOnExec dups fd and marks it close-on-exec. 484 func DupCloseOnExec(fd int) (int, string, error) { 485 if syscall.F_DUPFD_CLOEXEC != 0 && atomic.LoadInt32(&tryDupCloexec) == 1 { 486 r0, e1 := fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0) 487 if e1 == nil { 488 return r0, "", nil 489 } 490 switch e1.(syscall.Errno) { 491 case syscall.EINVAL, syscall.ENOSYS: 492 // Old kernel, or js/wasm (which returns 493 // ENOSYS). Fall back to the portable way from 494 // now on. 495 atomic.StoreInt32(&tryDupCloexec, 0) 496 default: 497 return -1, "fcntl", e1 498 } 499 } 500 return dupCloseOnExecOld(fd) 501 } 502 503 // dupCloseOnExecOld is the traditional way to dup an fd and 504 // set its O_CLOEXEC bit, using two system calls. 505 func dupCloseOnExecOld(fd int) (int, string, error) { 506 syscall.ForkLock.RLock() 507 defer syscall.ForkLock.RUnlock() 508 newfd, err := syscall.Dup(fd) 509 if err != nil { 510 return -1, "dup", err 511 } 512 syscall.CloseOnExec(newfd) 513 return newfd, "", nil 514 } 515 516 // Dup duplicates the file descriptor. 517 func (fd *FD) Dup() (int, string, error) { 518 if err := fd.incref(); err != nil { 519 return -1, "", err 520 } 521 defer fd.decref() 522 return DupCloseOnExec(fd.Sysfd) 523 } 524 525 // On Unix variants only, expose the IO event for the net code. 526 527 // WaitWrite waits until data can be read from fd. 528 func (fd *FD) WaitWrite() error { 529 return fd.pd.waitWrite(fd.isFile) 530 } 531 532 // WriteOnce is for testing only. It makes a single write call. 533 func (fd *FD) WriteOnce(p []byte) (int, error) { 534 if err := fd.writeLock(); err != nil { 535 return 0, err 536 } 537 defer fd.writeUnlock() 538 return ignoringEINTRIO(syscall.Write, fd.Sysfd, p) 539 } 540 541 // RawRead invokes the user-defined function f for a read operation. 542 func (fd *FD) RawRead(f func(uintptr) bool) error { 543 if err := fd.readLock(); err != nil { 544 return err 545 } 546 defer fd.readUnlock() 547 if err := fd.pd.prepareRead(fd.isFile); err != nil { 548 return err 549 } 550 for { 551 if f(uintptr(fd.Sysfd)) { 552 return nil 553 } 554 if err := fd.pd.waitRead(fd.isFile); err != nil { 555 return err 556 } 557 } 558 } 559 560 // RawWrite invokes the user-defined function f for a write operation. 561 func (fd *FD) RawWrite(f func(uintptr) bool) error { 562 if err := fd.writeLock(); err != nil { 563 return err 564 } 565 defer fd.writeUnlock() 566 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 567 return err 568 } 569 for { 570 if f(uintptr(fd.Sysfd)) { 571 return nil 572 } 573 if err := fd.pd.waitWrite(fd.isFile); err != nil { 574 return err 575 } 576 } 577 } 578 579 // ignoringEINTRIO is like ignoringEINTR, but just for IO calls. 580 func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) { 581 for { 582 n, err := fn(fd, p) 583 if err != syscall.EINTR { 584 return n, err 585 } 586 } 587 }