github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/poll/fd_unix.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris 6 7 package poll 8 9 import ( 10 "io" 11 "sync/atomic" 12 "syscall" 13 ) 14 15 // FD is a file descriptor. The net and os packages use this type as a 16 // field of a larger type representing a network connection or OS file. 17 type FD struct { 18 // Lock sysfd and serialize access to Read and Write methods. 19 fdmu fdMutex 20 21 // System file descriptor. Immutable until Close. 22 Sysfd int 23 24 // I/O poller. 25 pd pollDesc 26 27 // Writev cache. 28 iovecs *[]syscall.Iovec 29 30 // Semaphore signaled when file is closed. 31 csema uint32 32 33 // Non-zero if this file has been set to blocking mode. 34 isBlocking uint32 35 36 // Whether this is a streaming descriptor, as opposed to a 37 // packet-based descriptor like a UDP socket. Immutable. 38 IsStream bool 39 40 // Whether a zero byte read indicates EOF. This is false for a 41 // message based socket connection. 42 ZeroReadIsEOF bool 43 44 // Whether this is a file rather than a network socket. 45 isFile bool 46 } 47 48 // Init initializes the FD. The Sysfd field should already be set. 49 // This can be called multiple times on a single FD. 50 // The net argument is a network name from the net package (e.g., "tcp"), 51 // or "file". 52 // Set pollable to true if fd should be managed by runtime netpoll. 53 func (fd *FD) Init(net string, pollable bool) error { 54 // We don't actually care about the various network types. 55 if net == "file" { 56 fd.isFile = true 57 } 58 if !pollable { 59 fd.isBlocking = 1 60 return nil 61 } 62 err := fd.pd.init(fd) 63 if err != nil { 64 // If we could not initialize the runtime poller, 65 // assume we are using blocking mode. 66 fd.isBlocking = 1 67 } 68 return err 69 } 70 71 // Destroy closes the file descriptor. This is called when there are 72 // no remaining references. 73 func (fd *FD) destroy() error { 74 // Poller may want to unregister fd in readiness notification mechanism, 75 // so this must be executed before CloseFunc. 76 fd.pd.close() 77 78 // We don't use ignoringEINTR here because POSIX does not define 79 // whether the descriptor is closed if close returns EINTR. 80 // If the descriptor is indeed closed, using a loop would race 81 // with some other goroutine opening a new descriptor. 82 // (The Linux kernel guarantees that it is closed on an EINTR error.) 83 err := CloseFunc(fd.Sysfd) 84 85 fd.Sysfd = -1 86 runtime_Semrelease(&fd.csema) 87 return err 88 } 89 90 // Close closes the FD. The underlying file descriptor is closed by the 91 // destroy method when there are no remaining references. 92 func (fd *FD) Close() error { 93 if !fd.fdmu.increfAndClose() { 94 return errClosing(fd.isFile) 95 } 96 97 // Unblock any I/O. Once it all unblocks and returns, 98 // so that it cannot be referring to fd.sysfd anymore, 99 // the final decref will close fd.sysfd. This should happen 100 // fairly quickly, since all the I/O is non-blocking, and any 101 // attempts to block in the pollDesc will return errClosing(fd.isFile). 102 fd.pd.evict() 103 104 // The call to decref will call destroy if there are no other 105 // references. 106 err := fd.decref() 107 108 // Wait until the descriptor is closed. If this was the only 109 // reference, it is already closed. Only wait if the file has 110 // not been set to blocking mode, as otherwise any current I/O 111 // may be blocking, and that would block the Close. 112 // No need for an atomic read of isBlocking, increfAndClose means 113 // we have exclusive access to fd. 114 if fd.isBlocking == 0 { 115 runtime_Semacquire(&fd.csema) 116 } 117 118 return err 119 } 120 121 // SetBlocking puts the file into blocking mode. 122 func (fd *FD) SetBlocking() error { 123 if err := fd.incref(); err != nil { 124 return err 125 } 126 defer fd.decref() 127 // Atomic store so that concurrent calls to SetBlocking 128 // do not cause a race condition. isBlocking only ever goes 129 // from 0 to 1 so there is no real race here. 130 atomic.StoreUint32(&fd.isBlocking, 1) 131 return syscall.SetNonblock(fd.Sysfd, false) 132 } 133 134 // Darwin and FreeBSD can't read or write 2GB+ files at a time, 135 // even on 64-bit systems. 136 // The same is true of socket implementations on many systems. 137 // See golang.org/issue/7812 and golang.org/issue/16266. 138 // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned. 139 const maxRW = 1 << 30 140 141 // Read implements io.Reader. 142 func (fd *FD) Read(p []byte) (int, error) { 143 if err := fd.readLock(); err != nil { 144 return 0, err 145 } 146 defer fd.readUnlock() 147 if len(p) == 0 { 148 // If the caller wanted a zero byte read, return immediately 149 // without trying (but after acquiring the readLock). 150 // Otherwise syscall.Read returns 0, nil which looks like 151 // io.EOF. 152 // TODO(bradfitz): make it wait for readability? (Issue 15735) 153 return 0, nil 154 } 155 if err := fd.pd.prepareRead(fd.isFile); err != nil { 156 return 0, err 157 } 158 if fd.IsStream && len(p) > maxRW { 159 p = p[:maxRW] 160 } 161 for { 162 n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p) 163 if err != nil { 164 n = 0 165 if err == syscall.EAGAIN && fd.pd.pollable() { 166 if err = fd.pd.waitRead(fd.isFile); err == nil { 167 continue 168 } 169 } 170 } 171 err = fd.eofError(n, err) 172 return n, err 173 } 174 } 175 176 // Pread wraps the pread system call. 177 func (fd *FD) Pread(p []byte, off int64) (int, error) { 178 // Call incref, not readLock, because since pread specifies the 179 // offset it is independent from other reads. 180 // Similarly, using the poller doesn't make sense for pread. 181 if err := fd.incref(); err != nil { 182 return 0, err 183 } 184 if fd.IsStream && len(p) > maxRW { 185 p = p[:maxRW] 186 } 187 var ( 188 n int 189 err error 190 ) 191 for { 192 n, err = syscall.Pread(fd.Sysfd, p, off) 193 if err != syscall.EINTR { 194 break 195 } 196 } 197 if err != nil { 198 n = 0 199 } 200 fd.decref() 201 err = fd.eofError(n, err) 202 return n, err 203 } 204 205 // ReadFrom wraps the recvfrom network call. 206 func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) { 207 if err := fd.readLock(); err != nil { 208 return 0, nil, err 209 } 210 defer fd.readUnlock() 211 if err := fd.pd.prepareRead(fd.isFile); err != nil { 212 return 0, nil, err 213 } 214 for { 215 n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0) 216 if err != nil { 217 if err == syscall.EINTR { 218 continue 219 } 220 n = 0 221 if err == syscall.EAGAIN && fd.pd.pollable() { 222 if err = fd.pd.waitRead(fd.isFile); err == nil { 223 continue 224 } 225 } 226 } 227 err = fd.eofError(n, err) 228 return n, sa, err 229 } 230 } 231 232 // ReadMsg wraps the recvmsg network call. 233 func (fd *FD) ReadMsg(p []byte, oob []byte) (int, int, int, syscall.Sockaddr, error) { 234 if err := fd.readLock(); err != nil { 235 return 0, 0, 0, nil, err 236 } 237 defer fd.readUnlock() 238 if err := fd.pd.prepareRead(fd.isFile); err != nil { 239 return 0, 0, 0, nil, err 240 } 241 for { 242 n, oobn, flags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, 0) 243 if err != nil { 244 if err == syscall.EINTR { 245 continue 246 } 247 // TODO(dfc) should n and oobn be set to 0 248 if err == syscall.EAGAIN && fd.pd.pollable() { 249 if err = fd.pd.waitRead(fd.isFile); err == nil { 250 continue 251 } 252 } 253 } 254 err = fd.eofError(n, err) 255 return n, oobn, flags, sa, err 256 } 257 } 258 259 // Write implements io.Writer. 260 func (fd *FD) Write(p []byte) (int, error) { 261 if err := fd.writeLock(); err != nil { 262 return 0, err 263 } 264 defer fd.writeUnlock() 265 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 266 return 0, err 267 } 268 var nn int 269 for { 270 max := len(p) 271 if fd.IsStream && max-nn > maxRW { 272 max = nn + maxRW 273 } 274 n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max]) 275 if n > 0 { 276 nn += n 277 } 278 if nn == len(p) { 279 return nn, err 280 } 281 if err == syscall.EAGAIN && fd.pd.pollable() { 282 if err = fd.pd.waitWrite(fd.isFile); err == nil { 283 continue 284 } 285 } 286 if err != nil { 287 return nn, err 288 } 289 if n == 0 { 290 return nn, io.ErrUnexpectedEOF 291 } 292 } 293 } 294 295 // Pwrite wraps the pwrite system call. 296 func (fd *FD) Pwrite(p []byte, off int64) (int, error) { 297 // Call incref, not writeLock, because since pwrite specifies the 298 // offset it is independent from other writes. 299 // Similarly, using the poller doesn't make sense for pwrite. 300 if err := fd.incref(); err != nil { 301 return 0, err 302 } 303 defer fd.decref() 304 var nn int 305 for { 306 max := len(p) 307 if fd.IsStream && max-nn > maxRW { 308 max = nn + maxRW 309 } 310 n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn)) 311 if err == syscall.EINTR { 312 continue 313 } 314 if n > 0 { 315 nn += n 316 } 317 if nn == len(p) { 318 return nn, err 319 } 320 if err != nil { 321 return nn, err 322 } 323 if n == 0 { 324 return nn, io.ErrUnexpectedEOF 325 } 326 } 327 } 328 329 // WriteTo wraps the sendto network call. 330 func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) { 331 if err := fd.writeLock(); err != nil { 332 return 0, err 333 } 334 defer fd.writeUnlock() 335 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 336 return 0, err 337 } 338 for { 339 err := syscall.Sendto(fd.Sysfd, p, 0, sa) 340 if err == syscall.EINTR { 341 continue 342 } 343 if err == syscall.EAGAIN && fd.pd.pollable() { 344 if err = fd.pd.waitWrite(fd.isFile); err == nil { 345 continue 346 } 347 } 348 if err != nil { 349 return 0, err 350 } 351 return len(p), nil 352 } 353 } 354 355 // WriteMsg wraps the sendmsg network call. 356 func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) { 357 if err := fd.writeLock(); err != nil { 358 return 0, 0, err 359 } 360 defer fd.writeUnlock() 361 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 362 return 0, 0, err 363 } 364 for { 365 n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0) 366 if err == syscall.EINTR { 367 continue 368 } 369 if err == syscall.EAGAIN && fd.pd.pollable() { 370 if err = fd.pd.waitWrite(fd.isFile); err == nil { 371 continue 372 } 373 } 374 if err != nil { 375 return n, 0, err 376 } 377 return n, len(oob), err 378 } 379 } 380 381 // Accept wraps the accept network call. 382 func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) { 383 if err := fd.readLock(); err != nil { 384 return -1, nil, "", err 385 } 386 defer fd.readUnlock() 387 388 if err := fd.pd.prepareRead(fd.isFile); err != nil { 389 return -1, nil, "", err 390 } 391 for { 392 s, rsa, errcall, err := accept(fd.Sysfd) 393 if err == nil { 394 return s, rsa, "", err 395 } 396 switch err { 397 case syscall.EINTR: 398 continue 399 case syscall.EAGAIN: 400 if fd.pd.pollable() { 401 if err = fd.pd.waitRead(fd.isFile); err == nil { 402 continue 403 } 404 } 405 case syscall.ECONNABORTED: 406 // This means that a socket on the listen 407 // queue was closed before we Accept()ed it; 408 // it's a silly error, so try again. 409 continue 410 } 411 return -1, nil, errcall, err 412 } 413 } 414 415 // Seek wraps syscall.Seek. 416 func (fd *FD) Seek(offset int64, whence int) (int64, error) { 417 if err := fd.incref(); err != nil { 418 return 0, err 419 } 420 defer fd.decref() 421 return syscall.Seek(fd.Sysfd, offset, whence) 422 } 423 424 // ReadDirent wraps syscall.ReadDirent. 425 // We treat this like an ordinary system call rather than a call 426 // that tries to fill the buffer. 427 func (fd *FD) ReadDirent(buf []byte) (int, error) { 428 if err := fd.incref(); err != nil { 429 return 0, err 430 } 431 defer fd.decref() 432 for { 433 n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf) 434 if err != nil { 435 n = 0 436 if err == syscall.EAGAIN && fd.pd.pollable() { 437 if err = fd.pd.waitRead(fd.isFile); err == nil { 438 continue 439 } 440 } 441 } 442 // Do not call eofError; caller does not expect to see io.EOF. 443 return n, err 444 } 445 } 446 447 // Fchmod wraps syscall.Fchmod. 448 func (fd *FD) Fchmod(mode uint32) error { 449 if err := fd.incref(); err != nil { 450 return err 451 } 452 defer fd.decref() 453 return ignoringEINTR(func() error { 454 return syscall.Fchmod(fd.Sysfd, mode) 455 }) 456 } 457 458 // Fchdir wraps syscall.Fchdir. 459 func (fd *FD) Fchdir() error { 460 if err := fd.incref(); err != nil { 461 return err 462 } 463 defer fd.decref() 464 return syscall.Fchdir(fd.Sysfd) 465 } 466 467 // Fstat wraps syscall.Fstat 468 func (fd *FD) Fstat(s *syscall.Stat_t) error { 469 if err := fd.incref(); err != nil { 470 return err 471 } 472 defer fd.decref() 473 return ignoringEINTR(func() error { 474 return syscall.Fstat(fd.Sysfd, s) 475 }) 476 } 477 478 // tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used. 479 // If the kernel doesn't support it, this is set to 0. 480 var tryDupCloexec = int32(1) 481 482 // DupCloseOnExec dups fd and marks it close-on-exec. 483 func DupCloseOnExec(fd int) (int, string, error) { 484 if syscall.F_DUPFD_CLOEXEC != 0 && atomic.LoadInt32(&tryDupCloexec) == 1 { 485 r0, e1 := fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0) 486 if e1 == nil { 487 return r0, "", nil 488 } 489 switch e1.(syscall.Errno) { 490 case syscall.EINVAL, syscall.ENOSYS: 491 // Old kernel, or js/wasm (which returns 492 // ENOSYS). Fall back to the portable way from 493 // now on. 494 atomic.StoreInt32(&tryDupCloexec, 0) 495 default: 496 return -1, "fcntl", e1 497 } 498 } 499 return dupCloseOnExecOld(fd) 500 } 501 502 // dupCloseOnExecOld is the traditional way to dup an fd and 503 // set its O_CLOEXEC bit, using two system calls. 504 func dupCloseOnExecOld(fd int) (int, string, error) { 505 syscall.ForkLock.RLock() 506 defer syscall.ForkLock.RUnlock() 507 newfd, err := syscall.Dup(fd) 508 if err != nil { 509 return -1, "dup", err 510 } 511 syscall.CloseOnExec(newfd) 512 return newfd, "", nil 513 } 514 515 // Dup duplicates the file descriptor. 516 func (fd *FD) Dup() (int, string, error) { 517 if err := fd.incref(); err != nil { 518 return -1, "", err 519 } 520 defer fd.decref() 521 return DupCloseOnExec(fd.Sysfd) 522 } 523 524 // On Unix variants only, expose the IO event for the net code. 525 526 // WaitWrite waits until data can be read from fd. 527 func (fd *FD) WaitWrite() error { 528 return fd.pd.waitWrite(fd.isFile) 529 } 530 531 // WriteOnce is for testing only. It makes a single write call. 532 func (fd *FD) WriteOnce(p []byte) (int, error) { 533 if err := fd.writeLock(); err != nil { 534 return 0, err 535 } 536 defer fd.writeUnlock() 537 return ignoringEINTRIO(syscall.Write, fd.Sysfd, p) 538 } 539 540 // RawRead invokes the user-defined function f for a read operation. 541 func (fd *FD) RawRead(f func(uintptr) bool) error { 542 if err := fd.readLock(); err != nil { 543 return err 544 } 545 defer fd.readUnlock() 546 if err := fd.pd.prepareRead(fd.isFile); err != nil { 547 return err 548 } 549 for { 550 if f(uintptr(fd.Sysfd)) { 551 return nil 552 } 553 if err := fd.pd.waitRead(fd.isFile); err != nil { 554 return err 555 } 556 } 557 } 558 559 // RawWrite invokes the user-defined function f for a write operation. 560 func (fd *FD) RawWrite(f func(uintptr) bool) error { 561 if err := fd.writeLock(); err != nil { 562 return err 563 } 564 defer fd.writeUnlock() 565 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 566 return err 567 } 568 for { 569 if f(uintptr(fd.Sysfd)) { 570 return nil 571 } 572 if err := fd.pd.waitWrite(fd.isFile); err != nil { 573 return err 574 } 575 } 576 } 577 578 // ignoringEINTRIO is like ignoringEINTR, but just for IO calls. 579 func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) { 580 for { 581 n, err := fn(fd, p) 582 if err != syscall.EINTR { 583 return n, err 584 } 585 } 586 }