github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/cos/io.go (about) 1 // Package cos provides common low-level types and utilities for all aistore projects 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package cos 6 7 import ( 8 "bufio" 9 "bytes" 10 "errors" 11 "fmt" 12 "io" 13 "os" 14 "os/user" 15 "path/filepath" 16 "strconv" 17 18 "github.com/NVIDIA/aistore/cmn/debug" 19 "github.com/NVIDIA/aistore/cmn/nlog" 20 ) 21 22 // POSIX permissions 23 const ( 24 PermRWR os.FileMode = 0o640 25 PermRWRR os.FileMode = 0o644 // (archived) 26 PermRWXRX os.FileMode = 0o750 27 28 configDirMode = PermRWXRX | os.ModeDir 29 ) 30 31 const ContentLengthUnknown = -1 32 33 const PathSeparator = string(filepath.Separator) 34 35 // readers 36 type ( 37 ReadOpenCloser interface { 38 io.ReadCloser 39 Open() (ReadOpenCloser, error) 40 } 41 // ReadSizer is the interface that adds Size method to io.Reader. 42 ReadSizer interface { 43 io.Reader 44 Size() int64 45 } 46 // ReadCloseSizer is the interface that adds Size method to io.ReadCloser. 47 ReadCloseSizer interface { 48 io.ReadCloser 49 Size() int64 50 } 51 // ReadOpenCloseSizer is the interface that adds Size method to ReadOpenCloser. 52 ReadOpenCloseSizer interface { 53 ReadOpenCloser 54 Size() int64 55 } 56 sizedReader struct { 57 io.Reader 58 size int64 59 } 60 61 // implementations 62 63 nopReader struct { 64 size int 65 offset int 66 } 67 ReadReaderAt interface { 68 io.Reader 69 io.ReaderAt 70 } 71 deferRCS struct { 72 ReadCloseSizer 73 cb func() 74 } 75 CallbackROC struct { 76 roc ReadOpenCloser 77 readCallback func(int, error) 78 // Number of bytes we've already read, counting from last `Open`. 79 readBytes int 80 // Since we could possibly reopen a reader we must keep track of the 81 // bytes we already reported to `readCallback` so there is no duplications. 82 // This value is preserved across all the `Open`'s. 83 reportedBytes int 84 } 85 ReaderArgs struct { 86 R io.Reader 87 ReadCb func(int, error) 88 DeferCb func() 89 Size int64 90 } 91 ReaderWithArgs struct { 92 args ReaderArgs 93 } 94 nopOpener struct{ io.ReadCloser } 95 ) 96 97 // handles (and more readers) 98 type ( 99 FileHandle struct { 100 *os.File 101 fqn string 102 } 103 // SectionHandle is a section of reader with optional padding that implements 104 // ReadOpenCloser interface. 105 SectionHandle struct { 106 r io.ReaderAt 107 s *io.SectionReader 108 offset int64 // slice start 109 size int64 // slice length 110 padding int64 // padding size 111 padOffset int64 // offset inside padding when reading a file 112 } 113 // FileSectionHandle opens a file and reads a section of it with optional 114 // padding. It implements the ReadOpenCloser interface. 115 FileSectionHandle struct { 116 fh *FileHandle 117 sec *SectionHandle 118 } 119 // ByteHandle is a byte buffer(made from []byte) that implements 120 // ReadOpenCloser interface 121 ByteHandle struct { 122 *bytes.Reader 123 b []byte 124 } 125 ) 126 127 // writers 128 type ( 129 WriterAt interface { 130 io.Writer 131 io.WriterAt 132 } 133 WriteSizer interface { 134 io.Writer 135 Size() int64 136 } 137 138 WriterMulti struct{ writers []io.Writer } 139 140 // WriterOnly is a helper struct to hide `io.ReaderFrom` interface implementation 141 // As far as http.ResponseWriter (and its underlying tcp conn.), the following are tradeoffs: 142 // [-] sendfile (when sending), or 143 // [-] copy_file_range (when writing local files) 144 // [+] use (reusable) buffer, reduce code path, reduce locking 145 WriterOnly struct{ io.Writer } 146 147 // common between `Buffer` (below) and `memsys.SGL` 148 WriterTo2 interface { 149 WriteTo2(dst io.Writer) error 150 } 151 Buffer struct { 152 b *bytes.Buffer 153 } 154 ) 155 156 // interface guard 157 var ( 158 _ io.Reader = (*nopReader)(nil) 159 _ ReadOpenCloser = (*FileHandle)(nil) 160 _ ReadOpenCloser = (*CallbackROC)(nil) 161 _ ReadSizer = (*sizedReader)(nil) 162 _ ReadOpenCloser = (*SectionHandle)(nil) 163 _ ReadOpenCloser = (*FileSectionHandle)(nil) 164 _ ReadOpenCloser = (*nopOpener)(nil) 165 _ ReadOpenCloser = (*ByteHandle)(nil) 166 ) 167 168 // including "unexpecting EOF" to accommodate unsized streaming and 169 // early termination of the other side (prior to sending the first byte) 170 func IsEOF(err error) bool { 171 return err == io.EOF || err == io.ErrUnexpectedEOF || 172 errors.Is(err, io.ErrUnexpectedEOF) || errors.Is(err, io.EOF) 173 } 174 175 /////////////// 176 // nopReader // 177 /////////////// 178 179 func NopReader(size int64) io.Reader { 180 return &nopReader{ 181 size: int(size), 182 offset: 0, 183 } 184 } 185 186 func (r *nopReader) Read(b []byte) (int, error) { 187 left := r.size - r.offset 188 if left == 0 { 189 return 0, io.EOF 190 } 191 192 toRead := Min(len(b), left) 193 r.offset += toRead 194 return toRead, nil 195 } 196 197 //////////////// 198 // ByteHandle // 199 //////////////// 200 201 func NewByteHandle(bt []byte) *ByteHandle { return &ByteHandle{bytes.NewReader(bt), bt} } 202 func (*ByteHandle) Close() error { return nil } 203 func (b *ByteHandle) Open() (ReadOpenCloser, error) { return NewByteHandle(b.b), nil } 204 205 /////////////// 206 // nopOpener // 207 /////////////// 208 209 func NopOpener(r io.ReadCloser) ReadOpenCloser { return &nopOpener{r} } 210 func (n *nopOpener) Open() (ReadOpenCloser, error) { return n, nil } 211 212 //////////////// 213 // FileHandle // 214 //////////////// 215 216 func NewFileHandle(fqn string) (*FileHandle, error) { 217 file, err := os.Open(fqn) 218 if err != nil { 219 return nil, err 220 } 221 return &FileHandle{file, fqn}, nil 222 } 223 224 func (f *FileHandle) Open() (ReadOpenCloser, error) { 225 return NewFileHandle(f.fqn) 226 } 227 228 //////////// 229 // Sized* // 230 //////////// 231 232 func NewSizedReader(r io.Reader, size int64) ReadSizer { return &sizedReader{r, size} } 233 func (f *sizedReader) Size() int64 { return f.size } 234 235 ////////////// 236 // deferRCS // 237 ////////////// 238 239 func NewDeferRCS(r ReadCloseSizer, cb func()) ReadCloseSizer { 240 if cb == nil { 241 return r 242 } 243 return &deferRCS{r, cb} 244 } 245 246 func (r *deferRCS) Close() (err error) { 247 err = r.ReadCloseSizer.Close() 248 r.cb() 249 return 250 } 251 252 ///////////////// 253 // CallbackROC // 254 ///////////////// 255 256 func NewCallbackReadOpenCloser(r ReadOpenCloser, readCb func(int, error), reportedBytes ...int) *CallbackROC { 257 var rb int 258 if len(reportedBytes) > 0 { 259 rb = reportedBytes[0] 260 } 261 return &CallbackROC{ 262 roc: r, 263 readCallback: readCb, 264 readBytes: 0, 265 reportedBytes: rb, 266 } 267 } 268 269 func (r *CallbackROC) Read(p []byte) (n int, err error) { 270 n, err = r.roc.Read(p) 271 r.readBytes += n 272 if r.readBytes > r.reportedBytes { 273 diff := r.readBytes - r.reportedBytes 274 r.readCallback(diff, err) 275 r.reportedBytes += diff 276 } 277 return n, err 278 } 279 280 func (r *CallbackROC) Open() (ReadOpenCloser, error) { 281 rc, err := r.roc.Open() 282 if err != nil { 283 return rc, err 284 } 285 return NewCallbackReadOpenCloser(rc, r.readCallback, r.reportedBytes), nil 286 } 287 288 func (r *CallbackROC) Close() error { return r.roc.Close() } 289 290 //////////////////// 291 // ReaderWithArgs // 292 //////////////////// 293 294 func NewReaderWithArgs(args ReaderArgs) *ReaderWithArgs { 295 return &ReaderWithArgs{args: args} 296 } 297 298 func (r *ReaderWithArgs) Size() int64 { return r.args.Size } 299 300 func (r *ReaderWithArgs) Read(p []byte) (n int, err error) { 301 n, err = r.args.R.Read(p) 302 if r.args.ReadCb != nil { 303 r.args.ReadCb(n, err) 304 } 305 return n, err 306 } 307 308 func (*ReaderWithArgs) Open() (ReadOpenCloser, error) { panic("not supported") } 309 310 func (r *ReaderWithArgs) Close() (err error) { 311 if rc, ok := r.args.R.(io.ReadCloser); ok { 312 err = rc.Close() 313 } 314 if r.args.DeferCb != nil { 315 r.args.DeferCb() 316 } 317 return err 318 } 319 320 /////////////////// 321 // SectionHandle // 322 /////////////////// 323 324 func NewSectionHandle(r io.ReaderAt, offset, size, padding int64) *SectionHandle { 325 sec := io.NewSectionReader(r, offset, size) 326 return &SectionHandle{r, sec, offset, size, padding, 0} 327 } 328 329 func (f *SectionHandle) Open() (ReadOpenCloser, error) { 330 return NewSectionHandle(f.r, f.offset, f.size, f.padding), nil 331 } 332 333 // Reads a reader section. When the slice finishes but the buffer is not filled 334 // yet, act as if it reads a few more bytes from somewhere. 335 func (f *SectionHandle) Read(buf []byte) (n int, err error) { 336 var fromPad int64 337 338 // if it is still reading a file from disk - just continue reading 339 if f.padOffset == 0 { 340 n, err = f.s.Read(buf) 341 // if it reads fewer bytes than expected and it does not fail, 342 // try to "read" from padding 343 if f.padding == 0 || n == len(buf) || (err != nil && err != io.EOF) { 344 return n, err 345 } 346 fromPad = min(int64(len(buf)-n), f.padding) 347 } else { 348 // slice is already read, keep reading padding bytes 349 fromPad = min(int64(len(buf)), f.padding-f.padOffset) 350 } 351 352 // either buffer is full or end of padding is reached. Nothing to read 353 if fromPad == 0 { 354 return n, io.EOF 355 } 356 357 // the number of remained bytes in padding is enough to complete read request 358 for idx := n; idx < n+int(fromPad); idx++ { 359 buf[idx] = 0 360 } 361 n += int(fromPad) 362 f.padOffset += fromPad 363 364 if f.padOffset < f.padding { 365 return n, nil 366 } 367 return n, io.EOF 368 } 369 370 func (*SectionHandle) Close() error { return nil } 371 372 /////////////////////// 373 // FileSectionHandle // 374 /////////////////////// 375 376 // NewFileSectionHandle opens file which is expected at `fqn` and defines 377 // a SectionHandle on it to only read a specified section. 378 func NewFileSectionHandle(fqn string, offset, size int64) (*FileSectionHandle, error) { 379 fh, err := NewFileHandle(fqn) 380 if err != nil { 381 return nil, err 382 } 383 sec := NewSectionHandle(fh, offset, size, 0) 384 return &FileSectionHandle{fh: fh, sec: sec}, nil 385 } 386 387 func (f *FileSectionHandle) Open() (ReadOpenCloser, error) { 388 return NewFileSectionHandle(f.fh.fqn, f.sec.offset, f.sec.size) 389 } 390 391 func (f *FileSectionHandle) Read(buf []byte) (int, error) { return f.sec.Read(buf) } 392 func (f *FileSectionHandle) Close() error { return f.fh.Close() } 393 394 ///////////////// 395 // WriterMulti // 396 ///////////////// 397 398 func NewWriterMulti(w ...io.Writer) *WriterMulti { return &WriterMulti{w} } 399 400 func (mw *WriterMulti) Write(b []byte) (n int, err error) { 401 l := len(b) 402 for _, w := range mw.writers { 403 n, err = w.Write(b) 404 if err == nil && n == l { 405 continue 406 } 407 if err == nil { 408 err = io.ErrShortWrite 409 } 410 return 411 } 412 n = l 413 return 414 } 415 416 //////////// 417 // Buffer // 418 //////////// 419 420 func NewBuffer(b []byte) *Buffer { 421 return &Buffer{b: bytes.NewBuffer(b)} 422 } 423 424 func (w *Buffer) WriteTo2(dst io.Writer) (err error) { 425 _, err = w.b.WriteTo(dst) 426 return err 427 } 428 429 /////////////////////// 430 // misc file and dir // 431 /////////////////////// 432 433 // ExpandPath replaces common abbreviations in file path (eg. `~` with absolute 434 // path to the current user home directory) and cleans the path. 435 func ExpandPath(path string) string { 436 if path == "" || path[0] != '~' { 437 return filepath.Clean(path) 438 } 439 if len(path) > 1 && path[1] != '/' { 440 return filepath.Clean(path) 441 } 442 443 currentUser, err := user.Current() 444 if err != nil { 445 return filepath.Clean(path) 446 } 447 return filepath.Clean(filepath.Join(currentUser.HomeDir, path[1:])) 448 } 449 450 // CreateDir creates directory if does not exist. 451 // If the directory already exists returns nil. 452 func CreateDir(dir string) error { 453 return os.MkdirAll(dir, configDirMode) 454 } 455 456 // CreateFile creates a new write-only (O_WRONLY) file with default cos.PermRWR permissions. 457 // NOTE: if the file pathname doesn't exist it'll be created. 458 // NOTE: if the file already exists it'll be also silently truncated. 459 func CreateFile(fqn string) (*os.File, error) { 460 if err := CreateDir(filepath.Dir(fqn)); err != nil { 461 return nil, err 462 } 463 return os.OpenFile(fqn, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, PermRWR) 464 } 465 466 // (creates destination directory if doesn't exist) 467 func Rename(src, dst string) (err error) { 468 err = os.Rename(src, dst) 469 if err == nil || !os.IsNotExist(err) { 470 return 471 } 472 // create and retry (slow path) 473 err = CreateDir(filepath.Dir(dst)) 474 if err == nil { 475 err = os.Rename(src, dst) 476 } 477 return 478 } 479 480 // RemoveFile removes path; returns nil upon success or if the path does not exist. 481 func RemoveFile(path string) (err error) { 482 err = os.Remove(path) 483 if os.IsNotExist(err) { 484 err = nil 485 } 486 return 487 } 488 489 // and computes checksum if requested 490 func CopyFile(src, dst string, buf []byte, cksumType string) (written int64, cksum *CksumHash, err error) { 491 var srcFile, dstFile *os.File 492 if srcFile, err = os.Open(src); err != nil { 493 return 494 } 495 if dstFile, err = CreateFile(dst); err != nil { 496 nlog.Errorln("Failed to create", dst+":", err) 497 Close(srcFile) 498 return 499 } 500 written, cksum, err = CopyAndChecksum(dstFile, srcFile, buf, cksumType) 501 Close(srcFile) 502 defer func() { 503 if err == nil { 504 return 505 } 506 if nestedErr := RemoveFile(dst); nestedErr != nil { 507 nlog.Errorf("Nested (%v): failed to remove %s, err: %v", err, dst, nestedErr) 508 } 509 }() 510 if err != nil { 511 nlog.Errorln("Failed to copy", src, "=>", dst+":", err) 512 Close(dstFile) 513 return 514 } 515 if err = FlushClose(dstFile); err != nil { 516 nlog.Errorln("Failed to flush and close", dst+":", err) 517 } 518 return 519 } 520 521 func SaveReaderSafe(tmpfqn, fqn string, reader io.Reader, buf []byte, cksumType string, size int64) (cksum *CksumHash, 522 err error) { 523 if cksum, err = SaveReader(tmpfqn, reader, buf, cksumType, size); err != nil { 524 return 525 } 526 if err = Rename(tmpfqn, fqn); err != nil { 527 os.Remove(tmpfqn) 528 } 529 return 530 } 531 532 // Saves the reader directly to `fqn`, checksums if requested 533 func SaveReader(fqn string, reader io.Reader, buf []byte, cksumType string, size int64) (cksum *CksumHash, err error) { 534 var ( 535 written int64 536 file, erc = CreateFile(fqn) 537 writer = WriterOnly{file} // Hiding `ReadFrom` for `*os.File` introduced in Go1.15. 538 ) 539 if erc != nil { 540 return nil, erc 541 } 542 defer func() { 543 if err != nil { 544 os.Remove(fqn) 545 } 546 }() 547 548 if size >= 0 { 549 reader = io.LimitReader(reader, size) 550 } 551 written, cksum, err = CopyAndChecksum(writer, reader, buf, cksumType) 552 erc = file.Close() 553 554 if err != nil { 555 err = fmt.Errorf("failed to save to %q: %w", fqn, err) 556 return 557 } 558 if size >= 0 && written != size { 559 err = fmt.Errorf("wrong size when saving to %q: expected %d, got %d", fqn, size, written) 560 return 561 } 562 if erc != nil { 563 err = fmt.Errorf("failed to close %q: %w", fqn, erc) 564 return 565 } 566 return 567 } 568 569 // a slightly modified excerpt from https://github.com/golang/go/blob/master/src/io/io.go#L407 570 // - regular streaming copy with `io.WriteTo` and `io.ReaderFrom` not checked and not used 571 // - buffer _must_ be provided 572 // - see also: WriterOnly comment (above) 573 func CopyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { 574 for { 575 nr, er := src.Read(buf) 576 if nr > 0 { 577 nw, ew := dst.Write(buf[0:nr]) 578 if ew != nil { 579 if nw > 0 && nw <= nr { 580 written += int64(nw) 581 } 582 err = ew 583 break 584 } 585 if nw < 0 || nw > nr { 586 err = errors.New("cos.CopyBuffer: invalid write") 587 break 588 } 589 written += int64(nw) 590 if nr != nw { 591 err = io.ErrShortWrite 592 break 593 } 594 } 595 if er != nil { 596 if er != io.EOF { 597 err = er 598 } 599 break 600 } 601 } 602 return written, err 603 } 604 605 // Read only the first line of a file. 606 // Do not use for big files: it reads all the content and then extracts the first 607 // line. Use for files that may contains a few lines with trailing EOL 608 func ReadOneLine(filename string) (string, error) { 609 var line string 610 err := ReadLines(filename, func(l string) error { 611 line = l 612 return io.EOF 613 }) 614 return line, err 615 } 616 617 // Read only the first line of a file and return it as uint64 618 // Do not use for big files: it reads all the content and then extracts the first 619 // line. Use for files that may contains a few lines with trailing EOL 620 func ReadOneUint64(filename string) (uint64, error) { 621 line, err := ReadOneLine(filename) 622 if err != nil { 623 return 0, err 624 } 625 val, err := strconv.ParseUint(line, 10, 64) 626 return val, err 627 } 628 629 // Read only the first line of a file and return it as int64 630 // Do not use for big files: it reads all the content and then extracts the first 631 // line. Use for files that may contains a few lines with trailing EOL 632 func ReadOneInt64(filename string) (int64, error) { 633 line, err := ReadOneLine(filename) 634 if err != nil { 635 return 0, err 636 } 637 val, err := strconv.ParseInt(line, 10, 64) 638 return val, err 639 } 640 641 // Read a file line by line and call a callback for each line until the file 642 // ends or a callback returns io.EOF 643 func ReadLines(filename string, cb func(string) error) error { 644 b, err := os.ReadFile(filename) 645 if err != nil { 646 return err 647 } 648 649 lineReader := bufio.NewReader(bytes.NewBuffer(b)) 650 for { 651 line, _, err := lineReader.ReadLine() 652 if err != nil { 653 if err == io.EOF { 654 err = nil 655 } 656 return err 657 } 658 659 if err := cb(string(line)); err != nil { 660 if err != io.EOF { 661 return err 662 } 663 break 664 } 665 } 666 return nil 667 } 668 669 // CopyAndChecksum reads from `r` and writes to `w`; returns num bytes copied and checksum, or error 670 func CopyAndChecksum(w io.Writer, r io.Reader, buf []byte, cksumType string) (n int64, cksum *CksumHash, err error) { 671 debug.Assert(w != io.Discard || buf == nil) // io.Discard is io.ReaderFrom 672 673 if cksumType == ChecksumNone || cksumType == "" { 674 n, err = io.CopyBuffer(w, r, buf) 675 return n, nil, err 676 } 677 678 cksum = NewCksumHash(cksumType) 679 var mw io.Writer = cksum.H 680 if w != io.Discard { 681 mw = NewWriterMulti(cksum.H, w) 682 } 683 n, err = io.CopyBuffer(mw, r, buf) 684 cksum.Finalize() 685 return n, cksum, err 686 } 687 688 // ChecksumBytes computes checksum of given bytes using additional buffer. 689 func ChecksumBytes(b []byte, cksumType string) (cksum *Cksum, err error) { 690 _, hash, err := CopyAndChecksum(io.Discard, bytes.NewReader(b), nil, cksumType) 691 if err != nil { 692 return nil, err 693 } 694 return &hash.Cksum, nil 695 } 696 697 // DrainReader reads and discards all the data from a reader. 698 // No need for `io.CopyBuffer` as `io.Discard` has efficient `io.ReaderFrom` implementation. 699 func DrainReader(r io.Reader) { 700 _, err := io.Copy(io.Discard, r) 701 if err == nil || IsEOF(err) { 702 return 703 } 704 debug.AssertNoErr(err) 705 } 706 707 // FloodWriter writes `n` random bytes to provided writer. 708 func FloodWriter(w io.Writer, n int64) error { 709 _, err := io.CopyN(w, NowRand(), n) 710 return err 711 } 712 713 func Close(closer io.Closer) { 714 err := closer.Close() 715 debug.AssertNoErr(err) 716 } 717 718 func FlushClose(file *os.File) (err error) { 719 err = fflush(file) 720 debug.AssertNoErr(err) 721 err = file.Close() 722 debug.AssertNoErr(err) 723 return 724 } 725 726 // NOTE: 727 // - file.Close() is implementation dependent as far as flushing dirty buffers; 728 // - journaling filesystems, such as xfs, generally provide better guarantees but, again, not 100% 729 // - see discussion at https://lwn.net/Articles/788938; 730 // - going forward, some sort of `rename_barrier()` would be a much better alternative 731 // - doesn't work in testing environment - currently disabled, see #1141 and comments 732 733 const fsyncDisabled = true 734 735 func fflush(file *os.File) (err error) { 736 if fsyncDisabled { 737 return 738 } 739 return file.Sync() 740 }